From 1d211249cbca4e3e39f9e7a667b16868fa71fdd0 Mon Sep 17 00:00:00 2001 From: Akshay Singla Date: Sat, 6 Jun 2026 07:12:22 +0000 Subject: [PATCH 1/3] lakebox: clean comments and drop bugbash dir for open-source publication MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scrubs internal-only references from cmd/lakebox/ in preparation for this code shipping in a public CLI release: - Drops employee-name attributions in code comments (config.go, list.go, start.go, state.go). - Drops bug-bash form IDs (F10, F22) — the technical context they hung off stands on its own. - Drops references to private-repo paths (lakebox/src/..., universe#1966484, lakebox.proto) and to internal SAFE flag names (lakeboxCheckerEnabled), internal subsystems (ESM, LakeboxChecker), and internal vocabulary (dogfood staging). - Drops the "no prod listener yet" roadmap mention from resolveGatewayHost — advertising rollout state isn't useful to OSS readers. - Fixes lakebox status help-text examples to include the `databricks` prefix for consistency with sibling commands. - Removes internal/bugbash/ entirely (install.sh + exec.sh + README). The snapshot-install script was an internal coordination tool tied to the demo-lakebox branch lifecycle and doesn't belong in a public repo. The release-build workflow still triggers on `bugbash-*` branches as a benign CI convention. No behavior change; pure comment + dead-file cleanup. Tests still pass. Co-authored-by: Isaac --- cmd/lakebox/api.go | 42 +++++------- cmd/lakebox/config.go | 11 +-- cmd/lakebox/list.go | 12 ++-- cmd/lakebox/ssh.go | 20 +++--- cmd/lakebox/ssh_test.go | 2 +- cmd/lakebox/start.go | 14 ++-- cmd/lakebox/state.go | 2 +- cmd/lakebox/status.go | 4 +- internal/bugbash/README.md | 13 ---- internal/bugbash/exec.sh | 132 ------------------------------------ internal/bugbash/install.sh | 60 ---------------- 11 files changed, 50 insertions(+), 262 deletions(-) delete mode 100644 internal/bugbash/README.md delete mode 100755 internal/bugbash/exec.sh delete mode 100755 internal/bugbash/install.sh diff --git a/cmd/lakebox/api.go b/cmd/lakebox/api.go index 2f19722541a..7ac1e296c98 100644 --- a/cmd/lakebox/api.go +++ b/cmd/lakebox/api.go @@ -23,18 +23,18 @@ func sandboxPath(id string) string { return lakeboxAPIPath + "/" + url.PathEscape(id) } -// Sandboxes live under the `/sandboxes` sub-collection of the lakebox service -// namespace (see `lakebox.proto` `LakeboxService.CreateSandbox`). +// Sandboxes live under the `/sandboxes` sub-collection of the lakebox +// service namespace. const lakeboxAPIPath = "/api/2.0/lakebox/sandboxes" // SSH keys are nested under the lakebox service namespace alongside -// `sandboxes/` (see `LakeboxService.CreateSshKey`). +// `sandboxes/`. const lakeboxKeysAPIPath = "/api/2.0/lakebox/ssh-keys" -// orgIDHeader is sent by multi-workspace gateways (e.g. dogfood staging) so -// the gateway can scope the credential to a specific workspace. Without it, -// requests fail with "Credential was not sent or was of an unsupported type -// for this API." +// orgIDHeader is sent by multi-workspace gateways so the gateway can +// scope the credential to a specific workspace. Without it, requests +// fail with "Credential was not sent or was of an unsupported type for +// this API." const orgIDHeader = "X-Databricks-Org-Id" // maxNameBytes mirrors the manager-side `Sandbox.name` length cap. The @@ -78,9 +78,9 @@ type createRequest struct { // // `FQDN` is the manager's internal routing hostname — not user-actionable. // `GatewayHost` is the public SSH gateway hostname for the workspace, -// stamped by the manager (universe#1966484) so the CLI no longer needs to -// hardcode regional defaults. Both are `omitempty` so old/new wire shapes -// round-trip cleanly. +// stamped by the manager so the CLI no longer needs to hardcode regional +// defaults. Both are `omitempty` so old/new wire shapes round-trip +// cleanly. type createResponse struct { SandboxID string `json:"sandboxId"` Status string `json:"status"` @@ -130,20 +130,16 @@ func (e *sandboxEntry) idleTimeoutSecs() int64 { } // autoStopLabel renders the auto-stop policy advertised by the manager -// for one sandbox into a short human-readable string. Mirrors the wire -// semantics from `lakebox/proto/lakebox.proto`: +// for one sandbox into a short human-readable string. Wire semantics: // - `no_autostop == true` → never auto-stops // - `idle_timeout` set and positive → that many seconds // - otherwise → no enforcement today; render as "never" // // The "otherwise" branch used to render a hardcoded `10m` claiming to -// mirror a manager-side `watchdog_idle_grace_secs` fallback. That -// fallback does not exist in the current tree (only a stale comment in -// `lakebox/proto/lakebox.proto`); the ESM-side `LakeboxChecker` is also -// gated off via the `lakeboxCheckerEnabled` SAFE flag, so unset -// `idle_timeout` is functionally "never auto-stops" today. Once the -// manager enforces a real default, swap this branch back to a duration -// label. +// mirror a server-side idle-grace fallback. That fallback is not +// currently enforced, so unset `idle_timeout` is functionally "never +// auto-stops" today. Once the server enforces a real default, swap +// this branch back to a duration label. func (e *sandboxEntry) autoStopLabel() string { if e.NoAutostop != nil && *e.NoAutostop { return "never" @@ -185,11 +181,9 @@ type listResponse struct { // `list` handles the rare larger fleet. const listPageSize = 100 -// updateBody is the PATCH request body. The proto declares -// `UpdateSandboxRequest { Sandbox sandbox = 1 }` with `body: "sandbox"` -// in the (google.api.http) annotation, so the HTTP body is the inner -// `Sandbox` message directly — there is no `{"sandbox": {...}}` -// wrapping on the wire. +// updateBody is the PATCH request body. The server takes the inner +// `Sandbox` message as the request body directly — there is no +// `{"sandbox": {...}}` wrapping on the wire. // // Pointer fields encode the proto3 `optional` semantics — only the // fields we explicitly set are emitted, leaving everything else diff --git a/cmd/lakebox/config.go b/cmd/lakebox/config.go index 6ca1d6350ce..de920ec9bb7 100644 --- a/cmd/lakebox/config.go +++ b/cmd/lakebox/config.go @@ -11,9 +11,9 @@ import ( "github.com/spf13/cobra" ) -// MIN_IDLE_TIMEOUT_SECS / MAX_IDLE_TIMEOUT_SECS mirror the manager-side -// constants in lakebox/src/api/handlers/sandbox.rs. Pre-flighting client-side -// gives a clearer error than waiting for the server's INVALID_ARGUMENT. +// minIdleTimeoutSecs / maxIdleTimeoutSecs mirror the server-side bounds +// on `idle_timeout`. Pre-flighting client-side gives a clearer error +// than waiting for the server's INVALID_ARGUMENT. const ( minIdleTimeoutSecs = 60 maxIdleTimeoutSecs = 86_400 @@ -159,8 +159,9 @@ func checkIdleSecs(secs int64) (int64, error) { if secs < minIdleTimeoutSecs || secs > maxIdleTimeoutSecs { // Format both the bounds and the offending value as Go-style // durations to match the input form the user typed and the - // flag's --help text (Anwell flagged the prior `86400s` / - // `90000s` echoes as confusing — same unit as input now). + // flag's --help text — echoing the bounds back in raw seconds + // (`86400s` / `90000s`) reads as a different unit and confuses + // the user about whether they typed the right thing. return 0, fmt.Errorf( "idle-timeout must be 0 (clear) or between %s and %s, got %s", formatDurationSecs(minIdleTimeoutSecs), diff --git a/cmd/lakebox/list.go b/cmd/lakebox/list.go index 83be0fba989..ecb42b4e044 100644 --- a/cmd/lakebox/list.go +++ b/cmd/lakebox/list.go @@ -100,12 +100,12 @@ Example: // `never`, `15m`, `1h30m` — 8 chars covers them. // // NAME is *always* rendered, even when no sandbox has a - // custom --name set: yunquan flagged on the bug-bash form - // that the prior auto-hide made the table shape change - // between calls (NAME appears the moment you set --name on - // any one box and vanishes when you clear them all), which - // breaks scripts and muscle memory. Sandboxes without a - // custom name render as `-` in the NAME cell. + // custom --name set, so the table shape stays stable across + // calls. Auto-hiding the column means it appears the moment + // you set --name on any one box and vanishes when you clear + // them all — that breaks scripts that parse the output and + // flickers under the user's eyes between calls. Sandboxes + // without a custom name render as `-` in the NAME cell. // // All column widths are measured in *terminal cells*, not // bytes or runes — emoji and CJK glyphs render as 2 cells diff --git a/cmd/lakebox/ssh.go b/cmd/lakebox/ssh.go index b0a7ad6c654..f97e5b76636 100644 --- a/cmd/lakebox/ssh.go +++ b/cmd/lakebox/ssh.go @@ -25,8 +25,7 @@ const ( // resolveGatewayHost picks the SSH gateway hostname based on the workspace host. // Staging workspaces (*.staging.cloud.databricks.com etc.) route through -// ue1.s.dbrx.dev; everything else uses uw2.dbrx.dev. Both are dev-tier -// listeners (`.dbrx.dev`); there is no prod listener yet. +// ue1.s.dbrx.dev; everything else uses uw2.dbrx.dev. func resolveGatewayHost(workspaceHost string) string { if strings.Contains(workspaceHost, ".staging.") { return stagingDefaultGatewayHost @@ -120,9 +119,8 @@ Examples: } // Verify the local key is still registered before opening - // the SSH socket. The gateway's publickey-callback already - // checks the key (see lakebox/src/ssh/handler.rs's - // verify_ssh_key), but SSH_MSG_USERAUTH_FAILURE has no + // the SSH socket. The gateway already checks the key + // during userauth, but SSH_MSG_USERAUTH_FAILURE has no // free-form reason field and SSH_MSG_USERAUTH_BANNER is // swallowed by many client wrappers — so the gateway can't // communicate "key not registered" through the SSH channel. @@ -238,11 +236,11 @@ Examples: // SSH protocol's reply surface (USERAUTH_FAILURE has no free-form // reason; USERAUTH_BANNER is widely swallowed) flattens "unknown // key", "key registered but not authorized for this sandbox", and -// "ESM is down" into the same "Permission denied (publickey)". This -// out-of-band HTTP check surfaces the specific case in language the -// user can act on. listKeys errors fall through with a warning so a -// transient API hiccup doesn't block a connection the gateway could -// still route. +// "upstream service is down" into the same "Permission denied +// (publickey)". This out-of-band HTTP check surfaces the specific +// case in language the user can act on. listKeys errors fall +// through with a warning so a transient API hiccup doesn't block a +// connection the gateway could still route. func verifyKeyRegistered(ctx context.Context, api *lakeboxAPI, keyPath string) error { pub, err := os.ReadFile(keyPath + ".pub") if err != nil { @@ -320,7 +318,7 @@ func execSSHDirect(lakeboxID, host, port, keyPath string, extraArgs []string) er // `lakebox ssh -- bash -c 'echo hi'` // Cobra splits this into `["bash", "-c", "echo hi"]`. ssh's join // produces `bash -c echo hi` on the wire, which bash re-splits into -// `-c=echo` and `$0=hi` — bug F22. We fix that by shell-quoting +// `-c=echo` and `$0=hi` — wrong. We fix that by shell-quoting // each arg before append, so the remote sees `bash -c 'echo hi'`. // // The heuristic: if there's exactly one extra arg, pass it untouched; diff --git a/cmd/lakebox/ssh_test.go b/cmd/lakebox/ssh_test.go index a88ad47c67d..a2558794a25 100644 --- a/cmd/lakebox/ssh_test.go +++ b/cmd/lakebox/ssh_test.go @@ -50,7 +50,7 @@ func TestBuildSSHArgsQuoting(t *testing.T) { expected: []string{"ls", "-la", "/tmp"}, }, { - name: "bash -c '' — third arg gets quoted (this is F22)", + name: "bash -c '' — third arg gets quoted", // The whole point: without the quoting, the remote shell // re-splits "bash -c echo hi" and bash's -c eats just "echo". extraArgs: []string{"bash", "-c", "echo hi"}, diff --git a/cmd/lakebox/start.go b/cmd/lakebox/start.go index dd6b7a5dd40..e2ed5025a56 100644 --- a/cmd/lakebox/start.go +++ b/cmd/lakebox/start.go @@ -13,13 +13,13 @@ import ( ) // Bounds for `start`'s "wait until Running" loop. The server's StartSandbox -// RPC returns immediately with status="Creating" (reused for cold start — -// see F10), so the CLI polls until it actually reaches Running. Matches -// `create`'s blocking semantics so scripts can chain start → ssh / start → -// config without racing the cold boot. The 10-minute timeout covers -// Mitch's observed cold-start range (5–13 minutes) for the common case; -// truly stuck sandboxes still surface as a timeout rather than hanging -// the script forever. +// RPC returns immediately with status="Creating" (reused for cold start), +// so the CLI polls until it actually reaches Running. Matches `create`'s +// blocking semantics so scripts can chain start → ssh / start → config +// without racing the cold boot. The 10-minute timeout covers the observed +// cold-start range (5–13 minutes) for the common case; truly stuck +// sandboxes still surface as a timeout rather than hanging the script +// forever. const ( startPollInterval = 2 * time.Second startWaitTimeout = 10 * time.Minute diff --git a/cmd/lakebox/state.go b/cmd/lakebox/state.go index e30160563bc..a943769923b 100644 --- a/cmd/lakebox/state.go +++ b/cmd/lakebox/state.go @@ -224,7 +224,7 @@ func upsertSandbox(ctx context.Context, profile, id, name string) error { // the profile's `GatewayHosts` entry — there is nothing for the // gateway hostname to apply to until the user creates a new sandbox, // and leaving the entry behind accumulates orphan state across the -// lifecycle of a profile (per Mitch's "Delete cleanup" CUJ). +// lifecycle of a profile. func removeSandbox(ctx context.Context, profile, id string) error { state, err := loadState(ctx) if err != nil { diff --git a/cmd/lakebox/status.go b/cmd/lakebox/status.go index 97744e03032..0a88b0b91cf 100644 --- a/cmd/lakebox/status.go +++ b/cmd/lakebox/status.go @@ -19,8 +19,8 @@ func newStatusCommand() *cobra.Command { Long: `Show detailed status of a Lakebox environment. Example: - lakebox status happy-panda-1234 - lakebox status happy-panda-1234 --json`, + databricks lakebox status happy-panda-1234 + databricks lakebox status happy-panda-1234 --json`, Args: cobra.ExactArgs(1), PreRunE: root.MustWorkspaceClient, ValidArgsFunction: completeSandboxIDs, diff --git a/internal/bugbash/README.md b/internal/bugbash/README.md deleted file mode 100644 index 941ab6227cc..00000000000 --- a/internal/bugbash/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# Bugbash - -The script in this directory can be used to conveniently exec into a shell -where a CLI build for a specific branch is made available. - -## Usage - -This script prompts if you do NOT have at least Bash 5 installed, -but works without command completion with earlier versions. - -```shell -bash <(curl -fsSL https://raw.githubusercontent.com/databricks/cli/main/internal/bugbash/exec.sh) my-branch -``` diff --git a/internal/bugbash/exec.sh b/internal/bugbash/exec.sh deleted file mode 100755 index 986f3498d32..00000000000 --- a/internal/bugbash/exec.sh +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/bin/env bash - -set -euo pipefail - -# Set the GitHub repository for the Databricks CLI. -export GH_REPO="databricks/cli" - -# Synthesize the archive name for the snapshot build. -function cli_snapshot_archive() { - name="databricks_cli" - - # Append OS - case "$(uname -s)" in - Linux) - name="${name}_linux" - ;; - Darwin) - name="${name}_darwin" - ;; - *) - echo "Unknown operating system: $(uname -s)" - return 1 - ;; - esac - - # Append architecture - case "$(uname -m)" in - x86_64) - name="${name}_amd64" - ;; - arm64|aarch64) - name="${name}_arm64" - ;; - *) - echo "Unknown architecture: $(uname -m)" - return 1 - ;; - esac - - echo "${name}.tar.gz" -} - -BRANCH=$1 -shift - -# Default to main branch if branch is not specified. -if [ -z "$BRANCH" ]; then - BRANCH=main -fi - -if [ -z "$BRANCH" ]; then - echo "Please specify which branch to bugbash..." - exit 1 -fi - -# Check if the "gh" command is available. -if ! command -v gh &> /dev/null; then - echo "The GitHub CLI (gh) is required to download the snapshot build." - echo "Install and configure it with:" - echo "" - echo " brew install gh" - echo " gh auth login" - echo "" - exit 1 -fi - -echo "Looking for a snapshot build of the Databricks CLI on branch $BRANCH..." - -# Find last successful build on $BRANCH. -last_successful_run_id=$( - gh run list -b "$BRANCH" -w release-build --json 'databaseId,conclusion' | - jq 'limit(1; .[] | select(.conclusion == "success")) | .databaseId' -) -if [ -z "$last_successful_run_id" ]; then - echo "Unable to find last successful build of the release-build workflow for branch $BRANCH." - exit 1 -fi - -# Create a temporary directory to download and extract the artifact. -dir=$(mktemp -d) - -# Download the artifact. -echo "Downloading the snapshot build..." -gh run download "$last_successful_run_id" -n cli -D "$dir/.download" - -# Extract the archive for this platform. -archive=$(cli_snapshot_archive) -if [ ! -f "$dir/.download/$archive" ]; then - echo "Archive not found: $archive" - echo "Available archives:" - ls "$dir/.download/" - exit 1 -fi - -mkdir -p "$dir/.bin" -tar -xzf "$dir/.download/$archive" -C "$dir/.bin" - -# Make CLI available on $PATH. -chmod +x "$dir/.bin/databricks" -export PATH="$dir/.bin:$PATH" - -# Set the prompt to indicate the bugbash environment and exec. -export PS1="(bugbash $BRANCH) \[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ " - -# Display completion instructions. -echo "" -echo "==================================================================" - -if [[ ${BASH_VERSINFO[0]} -lt 5 ]]; then - echo -en "\033[31m" - echo "You have Bash version < 5 installed... completion won't work." - echo -en "\033[0m" - echo "" - echo "Install it with:" - echo "" - echo " brew install bash bash-completion" - echo "" - echo "==================================================================" -fi - -echo "" -echo "To load completions in your current shell session:" -echo "" -echo " source /opt/homebrew/etc/profile.d/bash_completion.sh" -echo " source <(databricks completion bash)" -echo "" -echo "==================================================================" -echo "" - -# Exec into a new shell. -# Note: don't use zsh because on macOS it _always_ overwrites PS1. -exec /usr/bin/env bash diff --git a/internal/bugbash/install.sh b/internal/bugbash/install.sh deleted file mode 100755 index 4543393ac82..00000000000 --- a/internal/bugbash/install.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env bash -# Install the latest demo-lakebox snapshot of the databricks CLI to -# ~/.databricks-snapshot/bin/databricks. Re-run any time you want to -# pick up new commits on the branch — the latest successful CI -# release-build is always what you get. -# -# The install dir is NOT added to PATH by default to avoid shadowing -# any system `databricks` binary. The script prints activation options -# at the end; pick whichever fits your workflow. - -set -euo pipefail - -REPO="databricks/cli" -BRANCH="demo-lakebox" -INSTALL_DIR="$HOME/.databricks-snapshot/bin" - -OS="$(uname -s | tr 'A-Z' 'a-z')" -ARCH="$(uname -m)"; [[ $ARCH == x86_64 ]] && ARCH=amd64 -[[ $ARCH == aarch64 ]] && ARCH=arm64 -case "$OS" in linux|darwin) ;; *) echo "unsupported OS: $OS" >&2; exit 1 ;; esac -case "$ARCH" in amd64|arm64) ;; *) echo "unsupported arch: $ARCH" >&2; exit 1 ;; esac - -for tool in gh jq curl; do - command -v "$tool" >/dev/null 2>&1 || { echo "$tool not installed (try: brew install $tool)" >&2; exit 1; } -done - -mkdir -p "$INSTALL_DIR" - -echo "→ looking up latest successful release-build run on $REPO @ $BRANCH …" -rid=$(gh run list -b "$BRANCH" -w release-build -R "$REPO" \ - --json databaseId,conclusion --limit 5 \ - | jq -r 'limit(1; .[] | select(.conclusion=="success")) | .databaseId') -[[ -n $rid && $rid != null ]] || { echo "no successful release-build run found on $BRANCH" >&2; exit 1; } - -tmp="$(mktemp -d)" -trap 'rm -rf "$tmp"' EXIT -echo "→ downloading artifact from run $rid …" -( cd "$tmp" && gh run download "$rid" -R "$REPO" -n cli >/dev/null ) - -tar="$tmp/databricks_cli_${OS}_${ARCH}.tar.gz" -[[ -f $tar ]] || { echo "expected platform tarball not in artifact: $tar" >&2; exit 1; } -( cd "$tmp" && tar -xzf "$tar" ) -install -m 755 "$tmp/databricks" "$INSTALL_DIR/databricks" - -version=$("$INSTALL_DIR/databricks" --version) -echo -echo "✓ installed $version to $INSTALL_DIR/databricks (CI run $rid)" -echo -echo "The install dir is intentionally NOT on PATH. Activate it one of three ways:" -echo -echo " 1. Just this shell, alongside your normal databricks:" -echo " alias databricks=\"$INSTALL_DIR/databricks\"" -echo -echo " 2. All future shells (replaces system databricks for new terminals):" -echo " echo 'export PATH=\"\$HOME/.databricks-snapshot/bin:\$PATH\"' >> ~/.zshrc" -echo " source ~/.zshrc" -echo -echo " 3. Without shadowing — invoke by full path or a distinct name:" -echo " $INSTALL_DIR/databricks lakebox list" -echo " ln -s $INSTALL_DIR/databricks ~/.local/bin/databricks-snapshot # then run \`databricks-snapshot lakebox …\`" From a7aa7555f77fff39ca41f1b4aea595aa8f0e8684 Mon Sep 17 00:00:00 2001 From: Akshay Singla Date: Sat, 6 Jun 2026 07:14:52 +0000 Subject: [PATCH 2/3] lakebox: restore exec.sh and README.md to internal/bugbash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit deleted the entire `internal/bugbash/` directory, but exec.sh and README.md predate the lakebox work — Pieter authored them in #1844 (May 2024), Lennart and Andrew extended them, and they are linked via a public curl-pipe-bash URL that may be referenced from internal Slack/docs. Removing them without consulting the maintainers is out of scope for an OSS-comment-cleanup PR. Only install.sh stays deleted: it was added in #5359 specifically to install the demo-lakebox snapshot, and the long-term home for that script is no longer inside the public repo. Co-authored-by: Isaac --- cmd/lakebox/ssh.go | 2 +- internal/bugbash/README.md | 13 ++++ internal/bugbash/exec.sh | 132 +++++++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 internal/bugbash/README.md create mode 100755 internal/bugbash/exec.sh diff --git a/cmd/lakebox/ssh.go b/cmd/lakebox/ssh.go index f97e5b76636..c8af2057b24 100644 --- a/cmd/lakebox/ssh.go +++ b/cmd/lakebox/ssh.go @@ -318,7 +318,7 @@ func execSSHDirect(lakeboxID, host, port, keyPath string, extraArgs []string) er // `lakebox ssh -- bash -c 'echo hi'` // Cobra splits this into `["bash", "-c", "echo hi"]`. ssh's join // produces `bash -c echo hi` on the wire, which bash re-splits into -// `-c=echo` and `$0=hi` — wrong. We fix that by shell-quoting +// `-c=echo` and `$0=hi` — losing the second word. We fix that by shell-quoting // each arg before append, so the remote sees `bash -c 'echo hi'`. // // The heuristic: if there's exactly one extra arg, pass it untouched; diff --git a/internal/bugbash/README.md b/internal/bugbash/README.md new file mode 100644 index 00000000000..941ab6227cc --- /dev/null +++ b/internal/bugbash/README.md @@ -0,0 +1,13 @@ +# Bugbash + +The script in this directory can be used to conveniently exec into a shell +where a CLI build for a specific branch is made available. + +## Usage + +This script prompts if you do NOT have at least Bash 5 installed, +but works without command completion with earlier versions. + +```shell +bash <(curl -fsSL https://raw.githubusercontent.com/databricks/cli/main/internal/bugbash/exec.sh) my-branch +``` diff --git a/internal/bugbash/exec.sh b/internal/bugbash/exec.sh new file mode 100755 index 00000000000..986f3498d32 --- /dev/null +++ b/internal/bugbash/exec.sh @@ -0,0 +1,132 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Set the GitHub repository for the Databricks CLI. +export GH_REPO="databricks/cli" + +# Synthesize the archive name for the snapshot build. +function cli_snapshot_archive() { + name="databricks_cli" + + # Append OS + case "$(uname -s)" in + Linux) + name="${name}_linux" + ;; + Darwin) + name="${name}_darwin" + ;; + *) + echo "Unknown operating system: $(uname -s)" + return 1 + ;; + esac + + # Append architecture + case "$(uname -m)" in + x86_64) + name="${name}_amd64" + ;; + arm64|aarch64) + name="${name}_arm64" + ;; + *) + echo "Unknown architecture: $(uname -m)" + return 1 + ;; + esac + + echo "${name}.tar.gz" +} + +BRANCH=$1 +shift + +# Default to main branch if branch is not specified. +if [ -z "$BRANCH" ]; then + BRANCH=main +fi + +if [ -z "$BRANCH" ]; then + echo "Please specify which branch to bugbash..." + exit 1 +fi + +# Check if the "gh" command is available. +if ! command -v gh &> /dev/null; then + echo "The GitHub CLI (gh) is required to download the snapshot build." + echo "Install and configure it with:" + echo "" + echo " brew install gh" + echo " gh auth login" + echo "" + exit 1 +fi + +echo "Looking for a snapshot build of the Databricks CLI on branch $BRANCH..." + +# Find last successful build on $BRANCH. +last_successful_run_id=$( + gh run list -b "$BRANCH" -w release-build --json 'databaseId,conclusion' | + jq 'limit(1; .[] | select(.conclusion == "success")) | .databaseId' +) +if [ -z "$last_successful_run_id" ]; then + echo "Unable to find last successful build of the release-build workflow for branch $BRANCH." + exit 1 +fi + +# Create a temporary directory to download and extract the artifact. +dir=$(mktemp -d) + +# Download the artifact. +echo "Downloading the snapshot build..." +gh run download "$last_successful_run_id" -n cli -D "$dir/.download" + +# Extract the archive for this platform. +archive=$(cli_snapshot_archive) +if [ ! -f "$dir/.download/$archive" ]; then + echo "Archive not found: $archive" + echo "Available archives:" + ls "$dir/.download/" + exit 1 +fi + +mkdir -p "$dir/.bin" +tar -xzf "$dir/.download/$archive" -C "$dir/.bin" + +# Make CLI available on $PATH. +chmod +x "$dir/.bin/databricks" +export PATH="$dir/.bin:$PATH" + +# Set the prompt to indicate the bugbash environment and exec. +export PS1="(bugbash $BRANCH) \[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ " + +# Display completion instructions. +echo "" +echo "==================================================================" + +if [[ ${BASH_VERSINFO[0]} -lt 5 ]]; then + echo -en "\033[31m" + echo "You have Bash version < 5 installed... completion won't work." + echo -en "\033[0m" + echo "" + echo "Install it with:" + echo "" + echo " brew install bash bash-completion" + echo "" + echo "==================================================================" +fi + +echo "" +echo "To load completions in your current shell session:" +echo "" +echo " source /opt/homebrew/etc/profile.d/bash_completion.sh" +echo " source <(databricks completion bash)" +echo "" +echo "==================================================================" +echo "" + +# Exec into a new shell. +# Note: don't use zsh because on macOS it _always_ overwrites PS1. +exec /usr/bin/env bash From 50915f7fa3995282012ce642313e542529f991dc Mon Sep 17 00:00:00 2001 From: Akshay Singla Date: Sat, 6 Jun 2026 07:31:07 +0000 Subject: [PATCH 3/3] lakebox: tighten comments Pass over every Go file in cmd/lakebox/ to bring comment density in line with the repo style guide (comments explain WHY, not WHAT; default to none). Net 152 lines removed. What got cut: - Pure noise that restated obvious code ("Use the dedicated lakebox SSH key.", "Check that ssh-keygen is available before trying to generate.", "Slice off the OpenSSH comment by stopping at the second space.", etc.). - Multi-paragraph essays where one sentence was load-bearing. - Duplicated explanations across function-doc and call-site, or across sibling functions (stop/start/registerKey, sandboxEntry/ updateBody). - Code-history narration ("used to render a hardcoded 10m...", "until this helper existed lakebox commands silently..."). - Signature restatements at the end of function docs. What was preserved (the exemplars worth keeping): - sandboxPath security context. - idleTimeoutSecs Duration proto3 form. - listPage SDK slot-6 quirk. - verifyKeyRegistered SSH protocol limitation. - buildSSHArgs ssh-arg-joining trap. - removeSandbox orphan-gateway-host cleanup. - buildSSHConfigBlock UI alignment + first-match-wins ordering. - field SGR-before-padding invariant. Added missing function-doc comments to New, loadState, saveState, stateFilePath for consistency with the rest of the package. No behavior change. Co-authored-by: Isaac --- cmd/lakebox/api.go | 140 ++++++++++++++------------------------ cmd/lakebox/completion.go | 26 ++----- cmd/lakebox/config.go | 10 +-- cmd/lakebox/delete.go | 6 +- cmd/lakebox/keyhash.go | 6 -- cmd/lakebox/lakebox.go | 1 + cmd/lakebox/list.go | 28 ++------ cmd/lakebox/register.go | 24 ++----- cmd/lakebox/resolve.go | 2 - cmd/lakebox/ssh.go | 59 ++++------------ cmd/lakebox/sshconfig.go | 37 +++------- cmd/lakebox/sshkey.go | 3 +- cmd/lakebox/start.go | 12 ++-- cmd/lakebox/state.go | 4 ++ cmd/lakebox/ui.go | 16 ++--- 15 files changed, 111 insertions(+), 263 deletions(-) diff --git a/cmd/lakebox/api.go b/cmd/lakebox/api.go index 7ac1e296c98..b231a2d683c 100644 --- a/cmd/lakebox/api.go +++ b/cmd/lakebox/api.go @@ -15,39 +15,31 @@ import ( // sandboxPath returns the URL path for a single sandbox resource. The ID is // path-escaped so a value like `foo;rm -rf /` lands on -// `/sandboxes/foo%3Brm%20-rf%20%2F` and gets a clean 400 from -// validate_sandbox_id on the server, rather than its unescaped `/` re-routing -// the request to the list endpoint (which silently returns an empty result the -// CLI then renders as an all-zero sandbox record). +// `/sandboxes/foo%3Brm%20-rf%20%2F` and gets a clean 400 from the server, +// rather than its unescaped `/` re-routing the request to the list endpoint +// (which silently returns an empty result the CLI then renders as an +// all-zero sandbox record). func sandboxPath(id string) string { return lakeboxAPIPath + "/" + url.PathEscape(id) } -// Sandboxes live under the `/sandboxes` sub-collection of the lakebox -// service namespace. -const lakeboxAPIPath = "/api/2.0/lakebox/sandboxes" - -// SSH keys are nested under the lakebox service namespace alongside -// `sandboxes/`. -const lakeboxKeysAPIPath = "/api/2.0/lakebox/ssh-keys" +// Sub-collections under the lakebox service namespace. +const ( + lakeboxAPIPath = "/api/2.0/lakebox/sandboxes" + lakeboxKeysAPIPath = "/api/2.0/lakebox/ssh-keys" +) -// orgIDHeader is sent by multi-workspace gateways so the gateway can -// scope the credential to a specific workspace. Without it, requests -// fail with "Credential was not sent or was of an unsupported type for -// this API." +// orgIDHeader scopes the credential to a workspace on multi-workspace +// gateways. Without it, requests fail with "Credential was not sent or was +// of an unsupported type for this API." const orgIDHeader = "X-Databricks-Org-Id" -// maxNameBytes mirrors the manager-side `Sandbox.name` length cap. The -// server measures bytes, not characters, so a name made of emoji or other -// multi-byte UTF-8 hits the limit in fewer visible characters than the user -// expects. Mirroring the constant client-side lets us fail fast with the -// observed byte count instead of paying a round-trip for a 400. +// maxNameBytes mirrors the server-side `Sandbox.name` cap. The server +// measures bytes (not runes), so emoji hit the limit faster than expected; +// mirroring it client-side lets us fail fast with the observed byte count. const maxNameBytes = 256 -// validateName returns an error when `name` exceeds the wire limit. The -// error names the observed byte count so the user can recover in one shot -// (the original server message just said "exceeds 256 bytes" without saying -// by how much, which is unhelpful when emoji are in play). +// validateName rejects names that exceed the wire limit (counted in bytes). func validateName(name string) error { if n := len(name); n > maxNameBytes { return fmt.Errorf("--name is %d bytes; limit is %d (emoji and most non-ASCII characters count as 2-4 bytes each)", n, maxNameBytes) @@ -61,26 +53,20 @@ type lakeboxAPI struct { } // sandboxCreateBody is the inner `Sandbox` message in the create payload. -// Only `name` is caller-settable today; all other fields are server-chosen. +// Only `name` is caller-settable; the rest are server-chosen. type sandboxCreateBody struct { Name string `json:"name,omitempty"` } -// createRequest is the JSON body for POST /api/2.0/lakebox/sandboxes. -// `CreateSandboxRequest { Sandbox sandbox = 1 }` has `body: "*"`, so the -// wire body is the full request with a `sandbox` wrapper. +// createRequest is the wrapped POST body for sandbox creation. type createRequest struct { Sandbox sandboxCreateBody `json:"sandbox"` } -// createResponse is the JSON body returned by POST /api/2.0/lakebox/sandboxes. -// Mirrors the `Sandbox` proto message after JSON transcoding. -// -// `FQDN` is the manager's internal routing hostname — not user-actionable. -// `GatewayHost` is the public SSH gateway hostname for the workspace, -// stamped by the manager so the CLI no longer needs to hardcode regional -// defaults. Both are `omitempty` so old/new wire shapes round-trip -// cleanly. +// createResponse mirrors the Sandbox proto after JSON transcoding. FQDN is +// the manager's internal routing host (not user-actionable); GatewayHost is +// the public SSH gateway. Both are `omitempty` so old and new server +// versions round-trip cleanly. type createResponse struct { SandboxID string `json:"sandboxId"` Status string `json:"status"` @@ -88,16 +74,11 @@ type createResponse struct { GatewayHost string `json:"gatewayHost,omitempty"` } -// sandboxEntry is a single item in the list response. -// Mirrors the `Sandbox` proto message after JSON transcoding. -// -// IdleTimeout and NoAutostop correspond to the proto's `optional` fields; -// they're pointers so we can tell "field absent on the wire" (server has -// the global default) from "explicitly set to 0 / false." -// -// `IdleTimeout` is a `google.protobuf.Duration`. Proto3 JSON canonical -// form serializes Duration as a string with an `s` suffix (e.g. -// `"900s"`), so the Go field is `*string` and we parse on read. +// sandboxEntry mirrors the Sandbox proto after JSON transcoding. +// IdleTimeout and NoAutostop are pointer-typed so we can distinguish +// "field absent on the wire" (server uses its default) from "explicitly +// set to 0 / false". IdleTimeout is a proto3-canonical Duration string +// (see idleTimeoutSecs). type sandboxEntry struct { SandboxID string `json:"sandboxId"` Status string `json:"status"` @@ -129,17 +110,12 @@ func (e *sandboxEntry) idleTimeoutSecs() int64 { return int64(d.Seconds()) } -// autoStopLabel renders the auto-stop policy advertised by the manager -// for one sandbox into a short human-readable string. Wire semantics: +// autoStopLabel renders the auto-stop policy for one sandbox: // - `no_autostop == true` → never auto-stops // - `idle_timeout` set and positive → that many seconds // - otherwise → no enforcement today; render as "never" // -// The "otherwise" branch used to render a hardcoded `10m` claiming to -// mirror a server-side idle-grace fallback. That fallback is not -// currently enforced, so unset `idle_timeout` is functionally "never -// auto-stops" today. Once the server enforces a real default, swap -// this branch back to a duration label. +// If the manager later enforces an idle-grace default, render it here. func (e *sandboxEntry) autoStopLabel() string { if e.NoAutostop != nil && *e.NoAutostop { return "never" @@ -152,7 +128,7 @@ func (e *sandboxEntry) autoStopLabel() string { // formatDurationSecs prints `secs` as a compact duration (e.g. `90s`, // `15m`, `2h`, `1h30m`). Falls back to seconds if it's not a clean -// minute/hour multiple. Avoids pulling in a dependency just for this. +// minute/hour multiple. func formatDurationSecs(secs int64) string { if secs < 60 { return fmt.Sprintf("%ds", secs) @@ -170,26 +146,17 @@ func formatDurationSecs(secs int64) string { } // listResponse is the JSON body returned by GET /api/2.0/lakebox/sandboxes. -// `nextPageToken` is empty on the final page (or when the result fits in one). type listResponse struct { Sandboxes []sandboxEntry `json:"sandboxes"` NextPageToken string `json:"nextPageToken,omitempty"` } -// listPageSize matches the manager-side default. Typical user fleets are -// well under this, so one round-trip covers them; the pagination loop in -// `list` handles the rare larger fleet. +// listPageSize matches the manager-side default. const listPageSize = 100 -// updateBody is the PATCH request body. The server takes the inner -// `Sandbox` message as the request body directly — there is no -// `{"sandbox": {...}}` wrapping on the wire. -// -// Pointer fields encode the proto3 `optional` semantics — only the -// fields we explicitly set are emitted, leaving everything else -// server-untouched. `IdleTimeout` is a proto3-canonical Duration -// string (e.g. `"900s"`); the server-side wire type is -// `google.protobuf.Duration`. +// updateBody is the PATCH body; the server takes the inner `Sandbox` +// message directly with no `{"sandbox": ...}` wrapping. Pointer fields +// encode proto3 optional semantics (see sandboxEntry). type updateBody struct { SandboxID string `json:"sandbox_id"` Name *string `json:"name,omitempty"` @@ -203,6 +170,7 @@ type registerKeyRequest struct { Name string `json:"name,omitempty"` } +// newLakeboxAPI returns a lakeboxAPI bound to the workspace client's config. func newLakeboxAPI(w *databricks.WorkspaceClient) (*lakeboxAPI, error) { c, err := client.New(w.Config) if err != nil { @@ -212,10 +180,9 @@ func newLakeboxAPI(w *databricks.WorkspaceClient) (*lakeboxAPI, error) { } // headers attaches the workspace routing identifier so multi-workspace -// gateways (e.g. SPOG hosts) can scope the credential. Mirrors the pattern -// in libs/telemetry, libs/filer, and SDK-generated workspace services. The -// auth.WorkspaceIDNone sentinel ("none") is treated as unset so the literal -// string never goes on the wire. +// gateways (e.g. SPOG hosts) can scope the credential. The +// auth.WorkspaceIDNone sentinel ("none") is treated as unset so the +// literal string never goes on the wire. func (a *lakeboxAPI) headers() map[string]string { wsID := a.c.Config.WorkspaceID if wsID == "" || wsID == auth.WorkspaceIDNone { @@ -225,8 +192,7 @@ func (a *lakeboxAPI) headers() map[string]string { } // create calls POST /api/2.0/lakebox/sandboxes. An empty `name` is omitted -// from the wire payload so the server treats it as "unset" rather than -// "explicit empty string." +// so the server treats it as "unset" rather than "explicit empty string". func (a *lakeboxAPI) create(ctx context.Context, name string) (*createResponse, error) { body := createRequest{Sandbox: sandboxCreateBody{Name: name}} var resp createResponse @@ -238,7 +204,7 @@ func (a *lakeboxAPI) create(ctx context.Context, name string) (*createResponse, } // list calls GET /api/2.0/lakebox/sandboxes, following pagination until the -// server stops sending `next_page_token`. Returns the full set in one slice. +// server stops sending `next_page_token`. func (a *lakeboxAPI) list(ctx context.Context) ([]sandboxEntry, error) { var all []sandboxEntry pageToken := "" @@ -255,8 +221,7 @@ func (a *lakeboxAPI) list(ctx context.Context) ([]sandboxEntry, error) { } } -// listPage fetches a single page of sandboxes. An empty `pageToken` requests -// the first page; the server enforces ordering across pages. +// listPage fetches a single page of sandboxes. // // `query` is passed in slot 6 (`request`), not slot 5 (`queryParams`). On // GET, the SDK's makeRequestBody serializes `request` into the URL query @@ -288,9 +253,9 @@ func (a *lakeboxAPI) get(ctx context.Context, id string) (*sandboxEntry, error) } // update calls PATCH /api/2.0/lakebox/sandboxes/{id} with whichever of -// `idle_timeout` / `no_autostop` the caller chose to set. Fields left -// nil are omitted from the wire payload, so the server preserves their -// current values. Returns the refreshed `sandboxEntry`. +// `idle_timeout` / `no_autostop` the caller chose to set. Fields left nil +// are omitted from the wire payload, so the server preserves their current +// values. Returns the refreshed `sandboxEntry`. func (a *lakeboxAPI) update(ctx context.Context, id string, name *string, idleTimeoutSecs *int64, noAutostop *bool) (*sandboxEntry, error) { var idleTimeout *string if idleTimeoutSecs != nil { @@ -317,9 +282,7 @@ func (a *lakeboxAPI) delete(ctx context.Context, id string) error { } // stop calls POST /api/2.0/lakebox/sandboxes/{id}/stop and returns the -// refreshed sandbox. The proto's `StopSandboxRequest` carries `sandbox_id` -// (redundant with the URL path) under `body: "*"`, so we mirror it -// explicitly even though the transcoder fills the field from the path. +// refreshed sandbox. func (a *lakeboxAPI) stop(ctx context.Context, id string) (*sandboxEntry, error) { body := map[string]string{"sandbox_id": id} var resp sandboxEntry @@ -331,7 +294,7 @@ func (a *lakeboxAPI) stop(ctx context.Context, id string) (*sandboxEntry, error) } // start calls POST /api/2.0/lakebox/sandboxes/{id}/start and returns the -// refreshed sandbox. Mirror of `stop`; same body shape per `body: "*"`. +// refreshed sandbox. func (a *lakeboxAPI) start(ctx context.Context, id string) (*sandboxEntry, error) { body := map[string]string{"sandbox_id": id} var resp sandboxEntry @@ -343,15 +306,12 @@ func (a *lakeboxAPI) start(ctx context.Context, id string) (*sandboxEntry, error } // registerKey calls POST /api/2.0/lakebox/ssh-keys. An empty `name` is -// omitted from the wire payload so the server records "unset" rather than -// an explicit empty string. +// omitted so the server records "unset" rather than an explicit empty string. func (a *lakeboxAPI) registerKey(ctx context.Context, publicKey, name string) error { return a.c.Do(ctx, http.MethodPost, lakeboxKeysAPIPath, a.headers(), nil, registerKeyRequest{PublicKey: publicKey, Name: name}, nil) } -// sshKeyEntry is a single item in the ssh-key list response. Mirrors the -// `SshKey` proto message after JSON transcoding (`key_hash` → `keyHash`, -// timestamps as RFC 3339 strings). +// sshKeyEntry is a single item in the ssh-key list response. type sshKeyEntry struct { KeyHash string `json:"keyHash"` Name string `json:"name,omitempty"` @@ -360,8 +320,8 @@ type sshKeyEntry struct { } // listKeysResponse is the JSON body returned by GET /api/2.0/lakebox/ssh-keys. -// Per-user keys are hard-capped at 100 server-side, so the full set fits in -// one response — no pagination. +// Per-user keys are hard-capped server-side, so the full set fits in one +// response — no pagination. type listKeysResponse struct { SshKeys []sshKeyEntry `json:"sshKeys"` } diff --git a/cmd/lakebox/completion.go b/cmd/lakebox/completion.go index f05cc621884..29e9c074206 100644 --- a/cmd/lakebox/completion.go +++ b/cmd/lakebox/completion.go @@ -6,19 +6,9 @@ import ( "github.com/spf13/cobra" ) -// completeSandboxIDs is a Cobra ValidArgsFunction returning the caller's -// sandbox IDs (and display names, when distinct from the ID) for tab -// completion. Reads purely from the local cache populated by `lakebox -// list` / `create` / `status` / etc. — no API call on ``, so a flaky -// network or unrefreshed auth token never makes the shell hang. -// -// Cobra runs ValidArgsFunction in a separate process from the main -// command, so we still need to bootstrap the workspace client just to -// learn which profile we're under. The cache itself is profile-scoped. -// -// Best-effort: any failure (no profile resolvable, empty cache) returns -// no suggestions instead of an error so the shell stays usable and the -// user can still type the ID by hand. +// completeSandboxIDs returns sandbox IDs and (distinct) display names +// from the local cache for tab completion. Cache-only so an unrefreshed +// token never hangs the shell; any failure yields no suggestions. func completeSandboxIDs(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { if len(args) > 0 { return nil, cobra.ShellCompDirectiveNoFileComp @@ -31,9 +21,7 @@ func completeSandboxIDs(cmd *cobra.Command, args []string, toComplete string) ([ if len(sbs) == 0 { return nil, cobra.ShellCompDirectiveNoFileComp } - // Offer the ID always, plus the display name when the user actually - // set one (server defaults `name` to the sandbox ID, so don't echo - // the same string twice). + // Server defaults `name` to the ID, so only emit the name when it's distinct. suggestions := make([]string, 0, len(sbs)*2) for _, s := range sbs { suggestions = append(suggestions, s.ID) @@ -44,10 +32,8 @@ func completeSandboxIDs(cmd *cobra.Command, args []string, toComplete string) ([ return suggestions, cobra.ShellCompDirectiveNoFileComp } -// completeSSHKeyHashes is the equivalent for `ssh-key delete `, -// returning the hashes of registered keys. SSH-key hashes aren't cached -// locally (per-user cap is ~100 server-side and listing is cheap), so -// this path still calls the API. +// completeSSHKeyHashes returns registered key hashes for `ssh-key delete`. +// Hashes aren't cached locally, so this path calls the API. func completeSSHKeyHashes(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { if len(args) > 0 { return nil, cobra.ShellCompDirectiveNoFileComp diff --git a/cmd/lakebox/config.go b/cmd/lakebox/config.go index de920ec9bb7..efe0d51220b 100644 --- a/cmd/lakebox/config.go +++ b/cmd/lakebox/config.go @@ -78,8 +78,6 @@ Examples: return err } - // Translate flag presence + value into the proto3 - // optional-field semantics the server expects. var idleSecs *int64 if cmd.Flags().Changed("idle-timeout") { secs, err := parseIdleTimeoutFlag(idleTimeoutFlag) @@ -157,11 +155,9 @@ func checkIdleSecs(secs int64) (int64, error) { return 0, nil // clear / revert to global default } if secs < minIdleTimeoutSecs || secs > maxIdleTimeoutSecs { - // Format both the bounds and the offending value as Go-style - // durations to match the input form the user typed and the - // flag's --help text — echoing the bounds back in raw seconds - // (`86400s` / `90000s`) reads as a different unit and confuses - // the user about whether they typed the right thing. + // Echo bounds in Go duration form so they match the input the + // user typed (raw seconds like `86400s` reads as a different + // unit). return 0, fmt.Errorf( "idle-timeout must be 0 (clear) or between %s and %s, got %s", formatDurationSecs(minIdleTimeoutSecs), diff --git a/cmd/lakebox/delete.go b/cmd/lakebox/delete.go index 809dbda5fb2..f43c8eb7357 100644 --- a/cmd/lakebox/delete.go +++ b/cmd/lakebox/delete.go @@ -47,10 +47,8 @@ Examples: return err } - // Validate existence first so `delete ` fails clearly - // instead of returning a confident "✓ Removed" on a sandbox - // the server never had — the DELETE endpoint treats 404 as - // idempotent success on the wire. + // DELETE returns success on 404, so pre-check existence to + // surface typos clearly. entry, err := api.get(ctx, lakeboxID) if err != nil { if errors.Is(err, apierr.ErrNotFound) { diff --git a/cmd/lakebox/keyhash.go b/cmd/lakebox/keyhash.go index 7f4fcd0bd45..823eb1e1682 100644 --- a/cmd/lakebox/keyhash.go +++ b/cmd/lakebox/keyhash.go @@ -10,13 +10,7 @@ import ( // the first 16 bytes and hex-encoded; the OpenSSH comment (anything after // the second whitespace-separated token) is stripped before hashing, so // registering the same key under different comments yields the same hash. -// Inputs that don't have a second token are hashed as-is. -// -// Useful for matching a locally-known key against entries in a -// GET /ssh-keys listing without sending the key contents back to the -// server. func keyHash(publicKey string) string { - // Slice off the OpenSSH comment by stopping at the second space. end := len(publicKey) spaces := 0 for i, c := range publicKey { diff --git a/cmd/lakebox/lakebox.go b/cmd/lakebox/lakebox.go index e406fccbbd1..382d6865d45 100644 --- a/cmd/lakebox/lakebox.go +++ b/cmd/lakebox/lakebox.go @@ -4,6 +4,7 @@ import ( "github.com/spf13/cobra" ) +// New returns the root command for the lakebox subcommand. func New() *cobra.Command { cmd := &cobra.Command{ Use: "lakebox", diff --git a/cmd/lakebox/list.go b/cmd/lakebox/list.go index ecb42b4e044..6e415241e95 100644 --- a/cmd/lakebox/list.go +++ b/cmd/lakebox/list.go @@ -96,23 +96,11 @@ Example: out := cmd.OutOrStdout() - // Compute column widths. AUTOSTOP holds short tokens like - // `never`, `15m`, `1h30m` — 8 chars covers them. - // - // NAME is *always* rendered, even when no sandbox has a - // custom --name set, so the table shape stays stable across - // calls. Auto-hiding the column means it appears the moment - // you set --name on any one box and vanishes when you clear - // them all — that breaks scripts that parse the output and - // flickers under the user's eyes between calls. Sandboxes - // without a custom name render as `-` in the NAME cell. - // - // All column widths are measured in *terminal cells*, not - // bytes or runes — emoji and CJK glyphs render as 2 cells - // despite being 1 rune / multi-byte, and using len() here - // (which counts bytes) misaligns the row whenever a `--name` - // includes wide characters. cmdio.Width gives the - // East-Asian-Width-corrected cell count. + // NAME is always rendered (even with no --name set on any + // row) so the table shape stays stable across calls; + // unnamed sandboxes render as `-`. Widths are measured in + // terminal cells via cmdio.Width so emoji / CJK names line + // up correctly. idCol := 10 autostopCol := 8 nameCol := 4 @@ -123,10 +111,8 @@ Example: if l := cmdio.Width(e.autoStopLabel()); l > autostopCol { autostopCol = l } - // Only let an actual custom name expand the column. A - // sandbox whose `name` happens to equal its `id` would - // otherwise drive the column to the ID's width — for no - // gain, since that row renders as `-`. + // A name equal to the id renders as `-`, so don't let + // it expand the column. if e.Name != "" && e.Name != e.SandboxID { if l := cmdio.Width(e.Name); l > nameCol { nameCol = l diff --git a/cmd/lakebox/register.go b/cmd/lakebox/register.go index 7bc083f149a..ca19a91865b 100644 --- a/cmd/lakebox/register.go +++ b/cmd/lakebox/register.go @@ -66,10 +66,7 @@ Examples: return fmt.Errorf("failed to read public key %s.pub: %w", keyPath, err) } - // Default the registered key's label to this machine's hostname so - // `lakebox ssh-key list` is meaningful when the user has keys from - // multiple machines. Failed hostname lookups fall through to the - // server's "unset" default rather than blocking registration. + // Default to hostname so `ssh-key list` is meaningful across machines. if name == "" { if host, err := os.Hostname(); err == nil { name = host @@ -84,19 +81,9 @@ Examples: } s.ok("SSH key registered") - // Write the shared `lakebox-gw` SSH-config alias so editor - // Remote-SSH ("Open in VS Code/Cursor") deep links and - // `ssh @lakebox-gw` from a plain shell both work - // without the user having to paste any config block. The - // alias name and shape are aligned with the workspace UI's - // "First time setup?" disclosure, so CLI users and - // pasted-snippet users converge on the same config. - // - // First register on this machine prompts for consent (the - // Include line we add to ~/.ssh/config is a permanent - // change to a user-managed file). Re-runs are silent — if - // the Include is already there, the user has opted in and - // we just refresh the managed file's contents. + // Write the shared `lakebox-gw` ~/.ssh/config alias so editor + // Remote-SSH and plain `ssh @lakebox-gw` both work without + // the user pasting any config block (see maybeWriteSSHConfig). if err := maybeWriteSSHConfig(ctx, keyPath, w.Config.Host); err != nil { warn(ctx, fmt.Sprintf("registered key, but failed to update ~/.ssh/config: %v", err)) } @@ -124,7 +111,7 @@ func lakeboxKeyPath(ctx context.Context) (string, error) { } // ensureLakeboxKey returns the path to the lakebox SSH key, generating it if -// it doesn't exist. Returns (path, wasGenerated, error). +// it doesn't exist. func ensureLakeboxKey(ctx context.Context) (string, bool, error) { keyPath, err := lakeboxKeyPath(ctx) if err != nil { @@ -135,7 +122,6 @@ func ensureLakeboxKey(ctx context.Context) (string, bool, error) { return keyPath, false, nil } - // Check that ssh-keygen is available before trying to generate. if _, err := exec.LookPath("ssh-keygen"); err != nil { return "", false, errors.New( "ssh-keygen not found in PATH.\n" + diff --git a/cmd/lakebox/resolve.go b/cmd/lakebox/resolve.go index 42da7503adb..d2e6afbd7aa 100644 --- a/cmd/lakebox/resolve.go +++ b/cmd/lakebox/resolve.go @@ -32,8 +32,6 @@ func resolveLocalID(ctx context.Context, profile, arg string) (string, error) { sbs := getSandboxes(ctx, profile) - // ID-first so that a sandbox whose --name happens to collide with - // another sandbox's ID never gets mistakenly resolved by name. for _, s := range sbs { if s.ID == arg { return arg, nil diff --git a/cmd/lakebox/ssh.go b/cmd/lakebox/ssh.go index c8af2057b24..73715bd2040 100644 --- a/cmd/lakebox/ssh.go +++ b/cmd/lakebox/ssh.go @@ -64,7 +64,6 @@ Examples: profile = w.Config.Host } - // Use the dedicated lakebox SSH key. keyPath, err := lakeboxKeyPath(ctx) if err != nil { return fmt.Errorf("failed to determine lakebox key path: %w", err) @@ -73,8 +72,6 @@ Examples: return fmt.Errorf("lakebox SSH key not found at %s — run 'databricks lakebox register' first", keyPath) } - // Parse args: everything before -- is the optional lakebox ID, - // everything after -- is passed through to ssh. var lakeboxID string var extraArgs []string @@ -90,8 +87,6 @@ Examples: extraArgs = args[dashAt:] } - // Resolve a user-typed name to its ID using the local cache. - // No-op when arg is already an ID or not in cache. if lakeboxID != "" { resolved, err := resolveLocalID(ctx, profile, lakeboxID) if err != nil { @@ -100,35 +95,19 @@ Examples: lakeboxID = resolved } - // sandboxGatewayHost captures the gateway hostname from any - // Sandbox response we touch in this command, so the resolution - // below can prefer it over the cached value. Stays "" when we - // never hit the API in this invocation (e.g. explicit-id ssh - // with a warm cache). - var sandboxGatewayHost string - - // sandboxStatus is the server-side state observed in this - // invocation, used to print an explicit "starting from stopped" - // notice before the connect spinner. Empty when we never hit - // the API. - var sandboxStatus string + // Captured from any Sandbox response we touch below; "" when + // we never hit the API in this invocation. + var ( + sandboxGatewayHost string + sandboxStatus string + ) api, err := newLakeboxAPI(w) if err != nil { return err } - // Verify the local key is still registered before opening - // the SSH socket. The gateway already checks the key - // during userauth, but SSH_MSG_USERAUTH_FAILURE has no - // free-form reason field and SSH_MSG_USERAUTH_BANNER is - // swallowed by many client wrappers — so the gateway can't - // communicate "key not registered" through the SSH channel. - // The HTTP API is the only out-of-band wire that can - // surface a structured "run register" pointer to the user. - // listKeys errors fall through with a warning so a - // transient API hiccup doesn't block a connection the - // gateway could still route. + // Surface deleted-key errors before ssh's opaque "Permission denied". if err := verifyKeyRegistered(ctx, api, keyPath); err != nil { return err } @@ -176,13 +155,8 @@ Examples: } } - // Explicitly start (and wait for) the sandbox if it isn't - // already Running. The gateway will auto-start a stopped - // sandbox on connect, but that path is opaque (ssh just - // hangs for minutes with no progress) and races the - // cold-start timeout. Driving the start ourselves gives - // the user a visible spinner with elapsed time and a - // deterministic timeout (see start.go). + // Drive start ourselves so the user sees a spinner instead + // of an opaque multi-minute hang inside ssh's connect path. if sandboxStatus != "" && !strings.EqualFold(sandboxStatus, "running") { final, err := ensureRunning(ctx, api, lakeboxID, sandboxStatus) if err != nil { @@ -212,12 +186,9 @@ Examples: _ = setGatewayHost(ctx, profile, sandboxGatewayHost) } - // Spinner runs until exec replaces this process (Linux/macOS) - // or until the ssh subprocess returns (Windows execv shim). - // We deliberately don't print "Connected" — at this point ssh - // hasn't actually handshaken yet, so any success affirmation - // here would be a lie that gets contradicted by ssh's own - // error output on the failure path. + // Don't print "Connected" here — ssh hasn't completed the + // handshake yet, so any success message would race ssh's + // own error output on the failure path. s := spin(ctx, "Connecting to "+cmdio.Bold(ctx, lakeboxID)+"…") defer s.Close() return execSSHDirect(lakeboxID, host, gatewayPort, keyPath, extraArgs) @@ -261,10 +232,8 @@ func verifyKeyRegistered(ctx context.Context, api *lakeboxAPI, keyPath string) e return fmt.Errorf("your lakebox SSH key (%s) is not registered with this workspace — run `databricks lakebox register` to re-register it", want) } -// ensureRunning brings the named sandbox to Running before ssh hands -// off. Owns its own spinner lifecycle — caller must not have one open. -// Calls api.start when the sandbox is currently Stopped; falls through -// to a poll for already-transitioning states (Creating, Starting). +// ensureRunning brings the named sandbox to Running with its own +// spinner — caller must not have one open. func ensureRunning(ctx context.Context, api *lakeboxAPI, id, currentStatus string) (*sandboxEntry, error) { s := spin(ctx, "Starting "+cmdio.Bold(ctx, id)+"…") defer s.Close() diff --git a/cmd/lakebox/sshconfig.go b/cmd/lakebox/sshconfig.go index 9e57dc49b09..24b04d158fc 100644 --- a/cmd/lakebox/sshconfig.go +++ b/cmd/lakebox/sshconfig.go @@ -33,8 +33,7 @@ const ( ) // sshConfigPaths returns (managedFile, mainConfig) under the user's -// ~/.ssh directory. Side-effect free; safe to call before deciding -// whether to actually write anything. +// ~/.ssh directory. func sshConfigPaths(ctx context.Context) (string, string, error) { home, err := env.UserHomeDir(ctx) if err != nil { @@ -45,9 +44,7 @@ func sshConfigPaths(ctx context.Context) (string, string, error) { } // sshConfigAlreadyManaged reports whether ~/.ssh/config already -// contains the lakebox-managed Include block. Used by `register` to -// decide whether to prompt the user for consent (first time) or -// silently refresh the managed file (already opted in). +// contains the lakebox-managed Include block. func sshConfigAlreadyManaged(ctx context.Context) (bool, error) { _, mainPath, err := sshConfigPaths(ctx) if err != nil { @@ -66,8 +63,6 @@ func sshConfigAlreadyManaged(ctx context.Context) (bool, error) { // writeSSHConfig writes the lakebox-managed SSH config block to a // managed file and, if not already present, adds an Include directive // to the user's ~/.ssh/config pointing at that file. -// -// Returns (managedFilePath, mainConfigPath, error). func writeSSHConfig(ctx context.Context, keyPath, gatewayHost, gatewayPort string) (string, string, error) { home, err := env.UserHomeDir(ctx) if err != nil { @@ -114,11 +109,8 @@ Host %s `, sshConfigAlias, gatewayHost, gatewayPort, keyPath) } -// writeManagedConfig writes content to path with 0600 perms, atomically -// via tmp + rename so a crash mid-write can't leave a half-written -// file. Skips the write entirely when the existing content already -// matches, so repeated `lakebox register` runs don't churn the file's -// mtime. +// writeManagedConfig writes content to path atomically with 0600 +// perms. No-op when the file already matches, to avoid churning mtime. func writeManagedConfig(path, content string) error { if existing, err := os.ReadFile(path); err == nil && bytes.Equal(existing, []byte(content)) { return nil @@ -154,16 +146,12 @@ func ensureMainIncludesManaged(mainPath, managedPath string) error { return fmt.Errorf("reading %s: %w", mainPath, err) } - // Prepend our block. SSH processes the file top-down and uses the - // first value seen for each option; placing our Include first lets - // `lakebox-gw` always resolve to the managed values even if the - // user has a wildcard `Host *` block later. + // Prepend so our Include wins SSH's first-match-per-option + // semantics over any wildcard `Host *` block later in the file. var buf bytes.Buffer buf.WriteString(managedBlock) if len(existing) > 0 { - // Ensure visual separation between our block and the user's - // content. If the existing file already starts with a blank - // line, don't add another. + // Avoid double blank lines if the file already starts with one. if !strings.HasPrefix(string(existing), "\n") { buf.WriteString("\n") } @@ -194,15 +182,8 @@ func hasOurMarkedBlock(text string) bool { } // maybeWriteSSHConfig writes the lakebox-managed SSH config, prompting -// for consent the first time on this machine. Re-runs are silent: the -// Include line in ~/.ssh/config signals prior opt-in, and we just -// refresh the managed file's contents (e.g. if the gateway host has -// changed). -// -// In non-interactive contexts (no TTY, no `--yes`-style flag here), -// we skip the write rather than fail — `lakebox ssh` still works via -// argv-explicit flags, only IDE Remote-SSH from the workspace UI -// needs the config alias. +// for consent the first time on this machine. Re-runs silently refresh +// the managed file. Non-interactive contexts skip the write entirely. func maybeWriteSSHConfig(ctx context.Context, keyPath, workspaceHost string) error { already, err := sshConfigAlreadyManaged(ctx) if err != nil { diff --git a/cmd/lakebox/sshkey.go b/cmd/lakebox/sshkey.go index 4539441ce2a..3ad634f282e 100644 --- a/cmd/lakebox/sshkey.go +++ b/cmd/lakebox/sshkey.go @@ -127,8 +127,7 @@ Examples: const hashCol = 32 const timeCol = 20 - // Leading 4-char gutter reserves space for a per-row `*` marker on - // the key matching this machine; header and separator leave it blank. + // 4-char gutter holds a per-row `*` for the local key (blank in header). header := fmt.Sprintf("%-*s %-*s %-*s %s", nameCol, "NAME", hashCol, "KEY HASH", timeCol, "CREATED", "LAST USED") fmt.Fprintf(out, " %s\n", cmdio.Faint(ctx, header)) diff --git a/cmd/lakebox/start.go b/cmd/lakebox/start.go index e2ed5025a56..6abb6cd4861 100644 --- a/cmd/lakebox/start.go +++ b/cmd/lakebox/start.go @@ -12,14 +12,10 @@ import ( "github.com/spf13/cobra" ) -// Bounds for `start`'s "wait until Running" loop. The server's StartSandbox -// RPC returns immediately with status="Creating" (reused for cold start), -// so the CLI polls until it actually reaches Running. Matches `create`'s -// blocking semantics so scripts can chain start → ssh / start → config -// without racing the cold boot. The 10-minute timeout covers the observed -// cold-start range (5–13 minutes) for the common case; truly stuck -// sandboxes still surface as a timeout rather than hanging the script -// forever. +// Bounds for `start`'s "wait until Running" poll. StartSandbox returns +// immediately with status="Creating", so we poll until it actually +// reaches Running. 10 min covers the observed cold-start range +// (5–13 min); stuck sandboxes surface as a timeout, not a hang. const ( startPollInterval = 2 * time.Second startWaitTimeout = 10 * time.Minute diff --git a/cmd/lakebox/state.go b/cmd/lakebox/state.go index a943769923b..e0a2d2505f5 100644 --- a/cmd/lakebox/state.go +++ b/cmd/lakebox/state.go @@ -39,6 +39,7 @@ type cachedSandbox struct { Name string `json:"name,omitempty"` } +// stateFilePath returns the on-disk location of the lakebox state file. func stateFilePath(ctx context.Context) (string, error) { home, err := env.UserHomeDir(ctx) if err != nil { @@ -47,6 +48,8 @@ func stateFilePath(ctx context.Context) (string, error) { return filepath.Join(home, ".databricks", "lakebox.json"), nil } +// loadState reads the lakebox state file, returning an empty state when +// the file doesn't exist yet. func loadState(ctx context.Context) (*stateFile, error) { path, err := stateFilePath(ctx) if err != nil { @@ -71,6 +74,7 @@ func loadState(ctx context.Context) (*stateFile, error) { return &state, nil } +// saveState writes the lakebox state file atomically. func saveState(ctx context.Context, state *stateFile) error { path, err := stateFilePath(ctx) if err != nil { diff --git a/cmd/lakebox/ui.go b/cmd/lakebox/ui.go index 45aa730f79e..203809ec982 100644 --- a/cmd/lakebox/ui.go +++ b/cmd/lakebox/ui.go @@ -11,10 +11,7 @@ import ( ) // jsonOutput reports whether the user asked for JSON output, via either -// the lakebox-local `--json` flag or the framework-global `-o json`. The -// global flag is the standard shape across other `databricks` commands -// and the validator already rejects bogus values, but until this helper -// existed lakebox commands silently ignored it and emitted text anyway. +// the local `--json` flag or the framework-global `-o json`. func jsonOutput(cmd *cobra.Command, jsonFlag bool) bool { if jsonFlag { return true @@ -25,18 +22,15 @@ func jsonOutput(cmd *cobra.Command, jsonFlag bool) bool { return false } -// cmdioSpinner is the subset of *cmdio.spinner's method set we need. -// Defining the interface locally lets us hold the unexported type as a -// struct field; cmdio's spinner satisfies it structurally. +// cmdioSpinner is the subset of *cmdio.spinner's method set we need, +// defined locally so we can hold the unexported type as a field. type cmdioSpinner interface { Update(msg string) Close() } -// spinner wraps cmdio.NewSpinner with ok/fail markers. ok and fail close the -// underlying spinner and log a final ✓/✗ line; Close stops the spinner -// without printing. cmdio's Close is itself idempotent, so a `defer s.Close()` -// is safe alongside an ok/fail call on the success path. +// spinner wraps cmdio.NewSpinner with ok/fail markers. Close (idempotent +// in cmdio) is safe alongside ok/fail, so callers can `defer s.Close()`. type spinner struct { cmdioSpinner ctx context.Context