diff --git a/cmd/cmd.go b/cmd/cmd.go index 2c1a32caa5..b9603b740a 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -18,6 +18,7 @@ import ( "github.com/databricks/cli/cmd/experimental" "github.com/databricks/cli/cmd/fs" "github.com/databricks/cli/cmd/labs" + "github.com/databricks/cli/cmd/lakebox" "github.com/databricks/cli/cmd/pipelines" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/cmd/selftest" @@ -121,6 +122,7 @@ func New(ctx context.Context) *cobra.Command { cli.AddCommand(configure.New()) cli.AddCommand(fs.New()) cli.AddCommand(labs.New(ctx)) + cli.AddCommand(lakebox.New()) cli.AddCommand(sync.New()) cli.AddCommand(version.New()) cli.AddCommand(selftest.New()) diff --git a/cmd/fuzz_panic_test.go b/cmd/fuzz_panic_test.go index 4fb5d5b9d3..e4037b4ef8 100644 --- a/cmd/fuzz_panic_test.go +++ b/cmd/fuzz_panic_test.go @@ -208,6 +208,7 @@ func isAutoGenerated(leaf leafCommand) bool { "configure": true, "experimental": true, "labs": true, + "lakebox": true, "pipelines": true, "psql": true, "selftest": true, diff --git a/cmd/lakebox/api.go b/cmd/lakebox/api.go new file mode 100644 index 0000000000..3ae086c922 --- /dev/null +++ b/cmd/lakebox/api.go @@ -0,0 +1,339 @@ +package lakebox + +import ( + "context" + "fmt" + "net/http" + "net/url" + "strings" + "time" + + "github.com/databricks/cli/libs/auth" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/client" +) + +// sandboxPath returns the URL path for a single sandbox resource. The ID is +// path-escaped so a value like `foo;rm -rf /` lands on +// `/sandboxes/foo%3Brm%20-rf%20%2F` and gets a clean 400 from the server, +// rather than its unescaped `/` re-routing the request to the list endpoint +// (which silently returns an empty result the CLI then renders as an +// all-zero sandbox record). +func sandboxPath(id string) string { + return lakeboxAPIPath + "/" + url.PathEscape(id) +} + +// Sub-collections under the lakebox service namespace. +const ( + lakeboxAPIPath = "/api/2.0/lakebox/sandboxes" + lakeboxKeysAPIPath = "/api/2.0/lakebox/ssh-keys" +) + +// orgIDHeader scopes the credential to a workspace on multi-workspace +// gateways. Without it, requests fail with "Credential was not sent or was +// of an unsupported type for this API." +const orgIDHeader = "X-Databricks-Org-Id" + +// maxNameBytes mirrors the server-side `Sandbox.name` cap. The server +// measures bytes (not runes), so emoji hit the limit faster than expected; +// mirroring it client-side lets us fail fast with the observed byte count. +const maxNameBytes = 256 + +// validateName rejects names that exceed the wire limit (counted in bytes). +func validateName(name string) error { + if n := len(name); n > maxNameBytes { + return fmt.Errorf("--name is %d bytes; limit is %d (emoji and most non-ASCII characters count as 2-4 bytes each)", n, maxNameBytes) + } + return nil +} + +// lakeboxAPI wraps the SDK ApiClient with workspace-id-aware request headers. +type lakeboxAPI struct { + c *client.DatabricksClient +} + +// sandboxCreateBody is the inner `Sandbox` message in the create payload. +// Only `name` is caller-settable; the rest are server-chosen. +type sandboxCreateBody struct { + Name string `json:"name,omitempty"` +} + +// createRequest is the wrapped POST body for sandbox creation. +type createRequest struct { + Sandbox sandboxCreateBody `json:"sandbox"` +} + +// createResponse mirrors the Sandbox proto after JSON transcoding. +// GatewayHost is `omitempty` so old and new server versions round-trip +// cleanly. +type createResponse struct { + SandboxID string `json:"sandboxId"` + Status string `json:"status"` + GatewayHost string `json:"gatewayHost,omitempty"` +} + +// sandboxEntry mirrors the Sandbox proto after JSON transcoding. +// IdleTimeout and NoAutostop are pointer-typed so we can distinguish +// "field absent on the wire" (server uses its default) from "explicitly +// set to 0 / false". IdleTimeout is a proto3-canonical Duration string +// (see idleTimeoutSecs). +type sandboxEntry struct { + SandboxID string `json:"sandboxId"` + Status string `json:"status"` + GatewayHost string `json:"gatewayHost,omitempty"` + Name string `json:"name,omitempty"` + CreateTime string `json:"createTime,omitempty"` + LastStartTime string `json:"lastStartTime,omitempty"` + IdleTimeout *string `json:"idleTimeout,omitempty"` + NoAutostop *bool `json:"noAutostop,omitempty"` +} + +// idleTimeoutSecs parses the proto3-canonical Duration string off +// `IdleTimeout` (e.g. `"900s"` → `900`). Returns 0 when unset or when +// the string is not a recognizable Duration. Sub-second precision is +// dropped — the watchdog only acts on whole seconds. +func (e *sandboxEntry) idleTimeoutSecs() int64 { + if e.IdleTimeout == nil { + return 0 + } + s := *e.IdleTimeout + if !strings.HasSuffix(s, "s") { + return 0 + } + d, err := time.ParseDuration(s) + if err != nil { + return 0 + } + return int64(d.Seconds()) +} + +// autoStopLabel renders the auto-stop policy for one sandbox: +// - `no_autostop == true` → never auto-stops +// - `idle_timeout` set and positive → that many seconds +// - otherwise → no enforcement today; render as "never" +// +// If the manager later enforces an idle-grace default, render it here. +func (e *sandboxEntry) autoStopLabel() string { + if e.NoAutostop != nil && *e.NoAutostop { + return "never" + } + if secs := e.idleTimeoutSecs(); secs > 0 { + return formatDurationSecs(secs) + } + return "never" +} + +// formatDurationSecs prints `secs` as a compact duration (e.g. `90s`, +// `15m`, `2h`, `1h30m`). Falls back to seconds if it's not a clean +// minute/hour multiple. +func formatDurationSecs(secs int64) string { + if secs < 60 { + return fmt.Sprintf("%ds", secs) + } + if secs%3600 == 0 { + return fmt.Sprintf("%dh", secs/3600) + } + if secs >= 3600 { + return fmt.Sprintf("%dh%dm", secs/3600, (secs%3600)/60) + } + if secs%60 == 0 { + return fmt.Sprintf("%dm", secs/60) + } + return fmt.Sprintf("%ds", secs) +} + +// listResponse is the JSON body returned by GET /api/2.0/lakebox/sandboxes. +type listResponse struct { + Sandboxes []sandboxEntry `json:"sandboxes"` + NextPageToken string `json:"nextPageToken,omitempty"` +} + +// listPageSize matches the manager-side default. +const listPageSize = 100 + +// updateBody is the PATCH body; the server takes the inner `Sandbox` +// message directly with no `{"sandbox": ...}` wrapping. Pointer fields +// encode proto3 optional semantics (see sandboxEntry). +type updateBody struct { + SandboxID string `json:"sandbox_id"` + Name *string `json:"name,omitempty"` + IdleTimeout *string `json:"idle_timeout,omitempty"` + NoAutostop *bool `json:"no_autostop,omitempty"` +} + +// registerKeyRequest is the JSON body for POST /api/2.0/lakebox/ssh-keys. +type registerKeyRequest struct { + PublicKey string `json:"public_key"` + Name string `json:"name,omitempty"` +} + +// newLakeboxAPI returns a lakeboxAPI bound to the workspace client's config. +func newLakeboxAPI(w *databricks.WorkspaceClient) (*lakeboxAPI, error) { + c, err := client.New(w.Config) + if err != nil { + return nil, fmt.Errorf("failed to create lakebox API client: %w", err) + } + return &lakeboxAPI{c: c}, nil +} + +// headers attaches the workspace routing identifier so multi-workspace +// gateways (e.g. SPOG hosts) can scope the credential. The +// auth.WorkspaceIDNone sentinel ("none") is treated as unset so the +// literal string never goes on the wire. +func (a *lakeboxAPI) headers() map[string]string { + wsID := a.c.Config.WorkspaceID + if wsID == "" || wsID == auth.WorkspaceIDNone { + return nil + } + return map[string]string{orgIDHeader: wsID} +} + +// create calls POST /api/2.0/lakebox/sandboxes. An empty `name` is omitted +// so the server treats it as "unset" rather than "explicit empty string". +func (a *lakeboxAPI) create(ctx context.Context, name string) (*createResponse, error) { + body := createRequest{Sandbox: sandboxCreateBody{Name: name}} + var resp createResponse + err := a.c.Do(ctx, http.MethodPost, lakeboxAPIPath, a.headers(), nil, body, &resp) + if err != nil { + return nil, err + } + return &resp, nil +} + +// list calls GET /api/2.0/lakebox/sandboxes, following pagination until the +// server stops sending `next_page_token`. +func (a *lakeboxAPI) list(ctx context.Context) ([]sandboxEntry, error) { + var all []sandboxEntry + pageToken := "" + for { + page, err := a.listPage(ctx, pageToken) + if err != nil { + return nil, err + } + all = append(all, page.Sandboxes...) + if page.NextPageToken == "" { + return all, nil + } + pageToken = page.NextPageToken + } +} + +// listPage fetches a single page of sandboxes. +// +// `query` is passed in slot 6 (`request`), not slot 5 (`queryParams`). On +// GET, the SDK's makeRequestBody serializes `request` into the URL query +// string and sends an empty body. Routing through `queryParams` instead +// makes it write a literal `null` body, which the lakebox manager rejects +// with `INVALID_PARAMETER_VALUE: Request body must be a JSON object`. See +// databricks-sdk-go/httpclient/request.go:makeRequestBody. +func (a *lakeboxAPI) listPage(ctx context.Context, pageToken string) (*listResponse, error) { + query := map[string]any{"page_size": listPageSize} + if pageToken != "" { + query["page_token"] = pageToken + } + var resp listResponse + err := a.c.Do(ctx, http.MethodGet, lakeboxAPIPath, a.headers(), nil, query, &resp) + if err != nil { + return nil, err + } + return &resp, nil +} + +// get calls GET /api/2.0/lakebox/sandboxes/{id}. +func (a *lakeboxAPI) get(ctx context.Context, id string) (*sandboxEntry, error) { + var resp sandboxEntry + err := a.c.Do(ctx, http.MethodGet, sandboxPath(id), a.headers(), nil, nil, &resp) + if err != nil { + return nil, err + } + return &resp, nil +} + +// update calls PATCH /api/2.0/lakebox/sandboxes/{id} with whichever of +// `idle_timeout` / `no_autostop` the caller chose to set. Fields left nil +// are omitted from the wire payload, so the server preserves their current +// values. Returns the refreshed `sandboxEntry`. +func (a *lakeboxAPI) update(ctx context.Context, id string, name *string, idleTimeoutSecs *int64, noAutostop *bool) (*sandboxEntry, error) { + var idleTimeout *string + if idleTimeoutSecs != nil { + s := fmt.Sprintf("%ds", *idleTimeoutSecs) + idleTimeout = &s + } + body := updateBody{ + SandboxID: id, + Name: name, + IdleTimeout: idleTimeout, + NoAutostop: noAutostop, + } + var resp sandboxEntry + err := a.c.Do(ctx, http.MethodPatch, sandboxPath(id), a.headers(), nil, body, &resp) + if err != nil { + return nil, err + } + return &resp, nil +} + +// delete calls DELETE /api/2.0/lakebox/sandboxes/{id}. +func (a *lakeboxAPI) delete(ctx context.Context, id string) error { + return a.c.Do(ctx, http.MethodDelete, sandboxPath(id), a.headers(), nil, nil, nil) +} + +// stop calls POST /api/2.0/lakebox/sandboxes/{id}/stop and returns the +// refreshed sandbox. +func (a *lakeboxAPI) stop(ctx context.Context, id string) (*sandboxEntry, error) { + body := map[string]string{"sandbox_id": id} + var resp sandboxEntry + err := a.c.Do(ctx, http.MethodPost, sandboxPath(id)+"/stop", a.headers(), nil, body, &resp) + if err != nil { + return nil, err + } + return &resp, nil +} + +// start calls POST /api/2.0/lakebox/sandboxes/{id}/start and returns the +// refreshed sandbox. +func (a *lakeboxAPI) start(ctx context.Context, id string) (*sandboxEntry, error) { + body := map[string]string{"sandbox_id": id} + var resp sandboxEntry + err := a.c.Do(ctx, http.MethodPost, sandboxPath(id)+"/start", a.headers(), nil, body, &resp) + if err != nil { + return nil, err + } + return &resp, nil +} + +// registerKey calls POST /api/2.0/lakebox/ssh-keys. An empty `name` is +// omitted so the server records "unset" rather than an explicit empty string. +func (a *lakeboxAPI) registerKey(ctx context.Context, publicKey, name string) error { + return a.c.Do(ctx, http.MethodPost, lakeboxKeysAPIPath, a.headers(), nil, registerKeyRequest{PublicKey: publicKey, Name: name}, nil) +} + +// sshKeyEntry is a single item in the ssh-key list response. +type sshKeyEntry struct { + KeyHash string `json:"keyHash"` + Name string `json:"name,omitempty"` + CreateTime string `json:"createTime,omitempty"` + LastUseTime string `json:"lastUseTime,omitempty"` +} + +// listKeysResponse is the JSON body returned by GET /api/2.0/lakebox/ssh-keys. +// Per-user keys are hard-capped server-side, so the full set fits in one +// response — no pagination. +type listKeysResponse struct { + SshKeys []sshKeyEntry `json:"sshKeys"` +} + +// listKeys calls GET /api/2.0/lakebox/ssh-keys. +func (a *lakeboxAPI) listKeys(ctx context.Context) ([]sshKeyEntry, error) { + var resp listKeysResponse + err := a.c.Do(ctx, http.MethodGet, lakeboxKeysAPIPath, a.headers(), nil, nil, &resp) + if err != nil { + return nil, err + } + return resp.SshKeys, nil +} + +// deleteKey calls DELETE /api/2.0/lakebox/ssh-keys/{key_hash}. +func (a *lakeboxAPI) deleteKey(ctx context.Context, keyHash string) error { + return a.c.Do(ctx, http.MethodDelete, lakeboxKeysAPIPath+"/"+url.PathEscape(keyHash), a.headers(), nil, nil, nil) +} diff --git a/cmd/lakebox/api_test.go b/cmd/lakebox/api_test.go new file mode 100644 index 0000000000..9664f19f7d --- /dev/null +++ b/cmd/lakebox/api_test.go @@ -0,0 +1,31 @@ +package lakebox + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestValidateNameAcceptsAscii(t *testing.T) { + require.NoError(t, validateName("")) + require.NoError(t, validateName("my-project")) + require.NoError(t, validateName(strings.Repeat("a", 256))) // boundary: exactly the limit +} + +func TestValidateNameRejectsOversize(t *testing.T) { + err := validateName(strings.Repeat("a", 257)) + require.Error(t, err) + assert.Contains(t, err.Error(), "257 bytes") + assert.Contains(t, err.Error(), "256") +} + +func TestValidateNameCountsBytesNotRunes(t *testing.T) { + // 64 panda emoji = 64 × 4 bytes = 256 bytes — at the limit, OK. + require.NoError(t, validateName(strings.Repeat("🐼", 64))) + // 65 = 260 bytes, rejected. + err := validateName(strings.Repeat("🐼", 65)) + require.Error(t, err) + assert.Contains(t, err.Error(), "260 bytes") +} diff --git a/cmd/lakebox/completion.go b/cmd/lakebox/completion.go new file mode 100644 index 0000000000..29e9c07420 --- /dev/null +++ b/cmd/lakebox/completion.go @@ -0,0 +1,72 @@ +package lakebox + +import ( + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdctx" + "github.com/spf13/cobra" +) + +// completeSandboxIDs returns sandbox IDs and (distinct) display names +// from the local cache for tab completion. Cache-only so an unrefreshed +// token never hangs the shell; any failure yields no suggestions. +func completeSandboxIDs(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { + if len(args) > 0 { + return nil, cobra.ShellCompDirectiveNoFileComp + } + profile := completionProfile(cmd, args) + if profile == "" { + return nil, cobra.ShellCompDirectiveNoFileComp + } + sbs := getSandboxes(cmd.Context(), profile) + if len(sbs) == 0 { + return nil, cobra.ShellCompDirectiveNoFileComp + } + // Server defaults `name` to the ID, so only emit the name when it's distinct. + suggestions := make([]string, 0, len(sbs)*2) + for _, s := range sbs { + suggestions = append(suggestions, s.ID) + if s.Name != "" && s.Name != s.ID { + suggestions = append(suggestions, s.Name) + } + } + return suggestions, cobra.ShellCompDirectiveNoFileComp +} + +// completeSSHKeyHashes returns registered key hashes for `ssh-key delete`. +// Hashes aren't cached locally, so this path calls the API. +func completeSSHKeyHashes(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { + if len(args) > 0 { + return nil, cobra.ShellCompDirectiveNoFileComp + } + if err := root.MustWorkspaceClient(cmd, args); err != nil { + return nil, cobra.ShellCompDirectiveNoFileComp + } + ctx := cmd.Context() + api, err := newLakeboxAPI(cmdctx.WorkspaceClient(ctx)) + if err != nil { + return nil, cobra.ShellCompDirectiveNoFileComp + } + keys, err := api.listKeys(ctx) + if err != nil { + return nil, cobra.ShellCompDirectiveNoFileComp + } + hashes := make([]string, 0, len(keys)) + for _, k := range keys { + hashes = append(hashes, k.KeyHash) + } + return hashes, cobra.ShellCompDirectiveNoFileComp +} + +// completionProfile returns the profile name the cache is keyed under, +// matching the resolution used by the runtime commands (Profile if set, +// else Host). Returns "" if the workspace client can't be bootstrapped. +func completionProfile(cmd *cobra.Command, args []string) string { + if err := root.MustWorkspaceClient(cmd, args); err != nil { + return "" + } + w := cmdctx.WorkspaceClient(cmd.Context()) + if w.Config.Profile != "" { + return w.Config.Profile + } + return w.Config.Host +} diff --git a/cmd/lakebox/config.go b/cmd/lakebox/config.go new file mode 100644 index 0000000000..efe0d51220 --- /dev/null +++ b/cmd/lakebox/config.go @@ -0,0 +1,169 @@ +package lakebox + +import ( + "errors" + "fmt" + "time" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdctx" + "github.com/databricks/cli/libs/cmdio" + "github.com/spf13/cobra" +) + +// minIdleTimeoutSecs / maxIdleTimeoutSecs mirror the server-side bounds +// on `idle_timeout`. Pre-flighting client-side gives a clearer error +// than waiting for the server's INVALID_ARGUMENT. +const ( + minIdleTimeoutSecs = 60 + maxIdleTimeoutSecs = 86_400 +) + +func newConfigCommand() *cobra.Command { + var idleTimeoutFlag string + var noAutostopFlag bool + var nameFlag string + + cmd := &cobra.Command{ + Use: "config ", + Short: "Configure a Lakebox's name and auto-stop policy", + Long: `Configure a Lakebox's name and auto-stop policy. + +Three knobs are independent — pass any combination: + + --name