diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 2cc9cf6cc6..353b20b35e 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -8,6 +8,7 @@ * Added `--limit` flag to all paginated list commands for client-side result capping ([#4984](https://github.com/databricks/cli/pull/4984)). * Accept `yes` in addition to `y` for confirmation prompts, and show `[y/N]` to indicate that no is the default. +* Cache `/.well-known/databricks-config` lookups under `~/.cache/databricks//host-metadata/` so repeat CLI invocations against the same host skip the ~700ms discovery round trip. * Deprecated `auth env`. The command is hidden from help listings and prints a deprecation warning to stderr; it will be removed in a future release. ### Bundles diff --git a/acceptance/auth/bundle_and_profile/output.txt b/acceptance/auth/bundle_and_profile/output.txt index 88deef1256..b2bab9342a 100644 --- a/acceptance/auth/bundle_and_profile/output.txt +++ b/acceptance/auth/bundle_and_profile/output.txt @@ -13,7 +13,7 @@ === Inside the bundle, profile flag not matching bundle host. Should use profile from the flag and not the bundle. >>> errcode [CLI] current-user me -p profile_name -Warn: Failed to resolve host metadata: (redacted). Falling back to user config. +Warn: [hostmetadata] failed to fetch host metadata for https://non.existing.subdomain.databricks.com, will skip for 1m0s Error: Get "https://non.existing.subdomain.databricks.com/api/2.0/preview/scim/v2/Me": (redacted) Exit code: 1 @@ -73,7 +73,7 @@ Validation OK! === Bundle commands load bundle configuration with -t and -p flag, validation not OK (profile host don't match bundle host) >>> errcode [CLI] bundle validate -t prod -p DEFAULT -Warn: Failed to resolve host metadata: (redacted). Falling back to user config. +Warn: [hostmetadata] failed to fetch host metadata for https://bar.com, will skip for 1m0s Error: cannot resolve bundle auth configuration: the host in the profile ([DATABRICKS_TARGET]) doesn’t match the host configured in the bundle (https://bar.com) Name: test-auth diff --git a/acceptance/auth/bundle_and_profile/test.toml b/acceptance/auth/bundle_and_profile/test.toml index 477e83a18d..92458e9d30 100644 --- a/acceptance/auth/bundle_and_profile/test.toml +++ b/acceptance/auth/bundle_and_profile/test.toml @@ -9,10 +9,6 @@ New='DATABRICKS_TARGET' Old='DATABRICKS_URL' New='DATABRICKS_TARGET' -[[Repls]] -Old='Warn: Failed to resolve host metadata: .*\. Falling back to user config\.' -New='Warn: Failed to resolve host metadata: (redacted). Falling back to user config.' - [[Repls]] Old='Get "https://non.existing.subdomain.databricks.com/api/2.0/preview/scim/v2/Me": .*' New='Get "https://non.existing.subdomain.databricks.com/api/2.0/preview/scim/v2/Me": (redacted)' diff --git a/acceptance/auth/credentials/unified-host/out.requests.txt b/acceptance/auth/credentials/unified-host/out.requests.txt index e94814526d..c154a54bff 100644 --- a/acceptance/auth/credentials/unified-host/out.requests.txt +++ b/acceptance/auth/credentials/unified-host/out.requests.txt @@ -22,15 +22,6 @@ "method": "GET", "path": "/api/2.0/preview/scim/v2/Me" } -{ - "headers": { - "User-Agent": [ - "cli/[DEV_VERSION] databricks-sdk-go/[SDK_VERSION] go/[GO_VERSION] os/[OS]" - ] - }, - "method": "GET", - "path": "/.well-known/databricks-config" -} { "headers": { "Authorization": [ diff --git a/acceptance/auth/host-metadata-cache/out.test.toml b/acceptance/auth/host-metadata-cache/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/auth/host-metadata-cache/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/auth/host-metadata-cache/output.txt b/acceptance/auth/host-metadata-cache/output.txt new file mode 100644 index 0000000000..266c9fa93e --- /dev/null +++ b/acceptance/auth/host-metadata-cache/output.txt @@ -0,0 +1,32 @@ + +=== First invocation populates the cache +{ + "profiles": [ + { + "name":"cached", + "host":"[DATABRICKS_URL]", + "cloud":"aws", + "auth_type":"", + "valid":false + } + ] +} + +=== Second invocation should read from the cache +{ + "profiles": [ + { + "name":"cached", + "host":"[DATABRICKS_URL]", + "cloud":"aws", + "auth_type":"", + "valid":false + } + ] +} + +=== Only one /.well-known/databricks-config request recorded +{ + "method": "GET", + "path": "/.well-known/databricks-config" +} diff --git a/acceptance/auth/host-metadata-cache/script b/acceptance/auth/host-metadata-cache/script new file mode 100644 index 0000000000..f7a5f2fe0f --- /dev/null +++ b/acceptance/auth/host-metadata-cache/script @@ -0,0 +1,19 @@ +sethome "./home" +export DATABRICKS_CACHE_DIR="$TEST_TMP_DIR/cache" + +# Point a profile at the mock server so auth profiles triggers a host metadata +# fetch. Without a profile the command does nothing and the cache is never read. +cat > "./home/.databrickscfg" <>> [CLI] cache clear Cache cleared successfully from [TEST_TMP_DIR]/.cache === First call after a clear is expected to be a cache miss: [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] -[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) +[DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing +[DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result +[DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing [DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result diff --git a/acceptance/cache/simple/output.txt b/acceptance/cache/simple/output.txt index 093900b94b..2206ffdbc7 100644 --- a/acceptance/cache/simple/output.txt +++ b/acceptance/cache/simple/output.txt @@ -1,13 +1,17 @@ === First call in a session is expected to be a cache miss: [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] -[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) +[DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing +[DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result +[DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing [DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result === Second call in a session is expected to be a cache hit [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] cache hit +[DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] +[DEBUG_TIMESTAMP] Debug: [Local Cache] cache hit === Bundle deploy should send telemetry values diff --git a/acceptance/cmd/auth/profiles/output.txt b/acceptance/cmd/auth/profiles/output.txt index 060da0eba5..207e2d5471 100644 --- a/acceptance/cmd/auth/profiles/output.txt +++ b/acceptance/cmd/auth/profiles/output.txt @@ -1,6 +1,6 @@ === Profiles with workspace_id (JSON output) -Warn: Failed to resolve host metadata: fetching host metadata from "https://test.cloud.databricks.com/.well-known/databricks-config": Get "https://test.cloud.databricks.com/.well-known/databricks-config": dial tcp: lookup test.cloud.databricks.com: no such host. Falling back to user config. +Warn: [hostmetadata] failed to fetch host metadata for https://test.cloud.databricks.com, will skip for 1m0s { "profiles": [ { diff --git a/acceptance/cmd/auth/profiles/test.toml b/acceptance/cmd/auth/profiles/test.toml index ad8ec1f872..36c0e7e237 100644 --- a/acceptance/cmd/auth/profiles/test.toml +++ b/acceptance/cmd/auth/profiles/test.toml @@ -1,9 +1,3 @@ Ignore = [ "home" ] - -# Normalize platform-specific DNS error messages in host metadata warnings. -# Linux includes resolver address (e.g. "on 127.0.0.53:53"), macOS does not. -[[Repls]] -Old = 'dial tcp: lookup (\S+)( on \S+)?: no such host' -New = 'dial tcp: lookup $1: no such host' diff --git a/acceptance/cmd/workspace/apps/out.requests.txt b/acceptance/cmd/workspace/apps/out.requests.txt index 9962050b50..ba4cf8bd6e 100644 --- a/acceptance/cmd/workspace/apps/out.requests.txt +++ b/acceptance/cmd/workspace/apps/out.requests.txt @@ -25,10 +25,6 @@ "method": "GET", "path": "/api/2.0/apps/test-name" } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "PATCH", "path": "/api/2.0/apps/test-name", diff --git a/acceptance/telemetry/failure/output.txt b/acceptance/telemetry/failure/output.txt index af0c34a13e..2086a88444 100644 --- a/acceptance/telemetry/failure/output.txt +++ b/acceptance/telemetry/failure/output.txt @@ -1,12 +1,15 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" +HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID +HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { < "oidc_endpoint": "[DATABRICKS_URL]/oidc", < "workspace_id": "[NUMID]" < } pid=PID sdk=true +HH:MM:SS Debug: [Local Cache] computed and stored result pid=PID HH:MM:SS Debug: Resolved workspace_id from host metadata: "[NUMID]" pid=PID sdk=true HH:MM:SS Debug: Resolved cloud from hostname: "AWS" pid=PID sdk=true HH:MM:SS Debug: Resolved discovery_url from host metadata: "[DATABRICKS_URL]/oidc/.well-known/oauth-authorization-server" pid=PID sdk=true diff --git a/acceptance/telemetry/partial-success/output.txt b/acceptance/telemetry/partial-success/output.txt index 113dc11b66..c641e6bc0d 100644 --- a/acceptance/telemetry/partial-success/output.txt +++ b/acceptance/telemetry/partial-success/output.txt @@ -1,12 +1,15 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" +HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID +HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { < "oidc_endpoint": "[DATABRICKS_URL]/oidc", < "workspace_id": "[NUMID]" < } pid=PID sdk=true +HH:MM:SS Debug: [Local Cache] computed and stored result pid=PID HH:MM:SS Debug: Resolved workspace_id from host metadata: "[NUMID]" pid=PID sdk=true HH:MM:SS Debug: Resolved cloud from hostname: "AWS" pid=PID sdk=true HH:MM:SS Debug: Resolved discovery_url from host metadata: "[DATABRICKS_URL]/oidc/.well-known/oauth-authorization-server" pid=PID sdk=true diff --git a/acceptance/telemetry/skipped/output.txt b/acceptance/telemetry/skipped/output.txt index e85ce380a4..9e784a8eb0 100644 --- a/acceptance/telemetry/skipped/output.txt +++ b/acceptance/telemetry/skipped/output.txt @@ -1,12 +1,15 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" +HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID +HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { < "oidc_endpoint": "[DATABRICKS_URL]/oidc", < "workspace_id": "[NUMID]" < } pid=PID sdk=true +HH:MM:SS Debug: [Local Cache] computed and stored result pid=PID HH:MM:SS Debug: Resolved workspace_id from host metadata: "[NUMID]" pid=PID sdk=true HH:MM:SS Debug: Resolved cloud from hostname: "AWS" pid=PID sdk=true HH:MM:SS Debug: Resolved discovery_url from host metadata: "[DATABRICKS_URL]/oidc/.well-known/oauth-authorization-server" pid=PID sdk=true diff --git a/acceptance/telemetry/success/output.txt b/acceptance/telemetry/success/output.txt index f3b410f765..96f72c9727 100644 --- a/acceptance/telemetry/success/output.txt +++ b/acceptance/telemetry/success/output.txt @@ -1,12 +1,15 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" +HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID +HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { < "oidc_endpoint": "[DATABRICKS_URL]/oidc", < "workspace_id": "[NUMID]" < } pid=PID sdk=true +HH:MM:SS Debug: [Local Cache] computed and stored result pid=PID HH:MM:SS Debug: Resolved workspace_id from host metadata: "[NUMID]" pid=PID sdk=true HH:MM:SS Debug: Resolved cloud from hostname: "AWS" pid=PID sdk=true HH:MM:SS Debug: Resolved discovery_url from host metadata: "[DATABRICKS_URL]/oidc/.well-known/oauth-authorization-server" pid=PID sdk=true diff --git a/acceptance/telemetry/test.toml b/acceptance/telemetry/test.toml index 574ffd3ce1..32660c3b81 100644 --- a/acceptance/telemetry/test.toml +++ b/acceptance/telemetry/test.toml @@ -36,3 +36,11 @@ New = "pid=PID" [[Repls]] Old = "\\([0-9]+ more bytes\\)" New = "(N more bytes)" + +# Host metadata cache keys vary per-test because the mock server URL changes. +# Normalize them so the golden output is stable across runs. +# Order=1 so it runs before the parent's `\d{14,}` → `[NUMID]` replacement. +[[Repls]] +Old = '[a-f0-9]{64}' +New = "[SHA256_HASH]" +Order = 1 diff --git a/acceptance/telemetry/timeout/output.txt b/acceptance/telemetry/timeout/output.txt index a124cc72b6..21f79ef7be 100644 --- a/acceptance/telemetry/timeout/output.txt +++ b/acceptance/telemetry/timeout/output.txt @@ -1,12 +1,15 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" +HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID +HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { < "oidc_endpoint": "[DATABRICKS_URL]/oidc", < "workspace_id": "[NUMID]" < } pid=PID sdk=true +HH:MM:SS Debug: [Local Cache] computed and stored result pid=PID HH:MM:SS Debug: Resolved workspace_id from host metadata: "[NUMID]" pid=PID sdk=true HH:MM:SS Debug: Resolved cloud from hostname: "AWS" pid=PID sdk=true HH:MM:SS Debug: Resolved discovery_url from host metadata: "[DATABRICKS_URL]/oidc/.well-known/oauth-authorization-server" pid=PID sdk=true diff --git a/acceptance/workspace/lakeview/publish/out.requests.txt b/acceptance/workspace/lakeview/publish/out.requests.txt index 4adba9b64a..e4802babc6 100644 --- a/acceptance/workspace/lakeview/publish/out.requests.txt +++ b/acceptance/workspace/lakeview/publish/out.requests.txt @@ -9,10 +9,6 @@ "path": "/Users/[USERNAME]" } } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "POST", "path": "/api/2.0/lakeview/dashboards", @@ -22,10 +18,6 @@ "warehouse_id": "test-warehouse" } } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "POST", "path": "/api/2.0/lakeview/dashboards/[DASHBOARD_ID]/published", diff --git a/acceptance/workspace/repos/create_with_provider/out.requests.txt b/acceptance/workspace/repos/create_with_provider/out.requests.txt index 73219c0a27..430eb33fe1 100644 --- a/acceptance/workspace/repos/create_with_provider/out.requests.txt +++ b/acceptance/workspace/repos/create_with_provider/out.requests.txt @@ -11,18 +11,10 @@ "url": "https://github.com/databricks/databricks-empty-ide-project.git" } } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.0/repos/[NUMID]" } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.0/workspace/get-status", @@ -34,10 +26,6 @@ "method": "GET", "path": "/api/2.0/repos/[NUMID]" } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "DELETE", "path": "/api/2.0/repos/[NUMID]" diff --git a/acceptance/workspace/repos/delete_by_path/out.requests.txt b/acceptance/workspace/repos/delete_by_path/out.requests.txt index f6857935ae..9fa6916971 100644 --- a/acceptance/workspace/repos/delete_by_path/out.requests.txt +++ b/acceptance/workspace/repos/delete_by_path/out.requests.txt @@ -11,10 +11,6 @@ "url": "https://github.com/databricks/databricks-empty-ide-project.git" } } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.0/workspace/get-status", @@ -26,10 +22,6 @@ "method": "GET", "path": "/api/2.0/repos/[NUMID]" } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.0/workspace/get-status", @@ -41,10 +33,6 @@ "method": "DELETE", "path": "/api/2.0/repos/[NUMID]" } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.0/workspace/get-status", diff --git a/acceptance/workspace/repos/get_errors/out.requests.txt b/acceptance/workspace/repos/get_errors/out.requests.txt index 24de0f3dd0..2bfe07b131 100644 --- a/acceptance/workspace/repos/get_errors/out.requests.txt +++ b/acceptance/workspace/repos/get_errors/out.requests.txt @@ -9,10 +9,6 @@ "path": "/Repos/me@databricks.com/doesnotexist" } } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "POST", "path": "/api/2.0/workspace/mkdirs", @@ -20,10 +16,6 @@ "path": "/not-a-repo" } } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.0/workspace/get-status", diff --git a/acceptance/workspace/repos/update/out.requests.txt b/acceptance/workspace/repos/update/out.requests.txt index ca982e372d..fa5a518dad 100644 --- a/acceptance/workspace/repos/update/out.requests.txt +++ b/acceptance/workspace/repos/update/out.requests.txt @@ -11,10 +11,6 @@ "url": "https://github.com/databricks/databricks-empty-ide-project.git" } } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "PATCH", "path": "/api/2.0/repos/[NUMID]", @@ -22,18 +18,10 @@ "branch": "update-by-id" } } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.0/repos/[NUMID]" } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.0/workspace/get-status", @@ -48,10 +36,6 @@ "branch": "update-by-path" } } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.0/repos/[NUMID]" diff --git a/internal/testutil/env.go b/internal/testutil/env.go index 2033d4fc83..abf577ece0 100644 --- a/internal/testutil/env.go +++ b/internal/testutil/env.go @@ -27,6 +27,9 @@ func CleanupEnvironment(t TestingT) { if runtime.GOOS == "windows" { t.Setenv("USERPROFILE", pwd) } + // Isolate the CLI cache (host metadata, user cache) so tests don't leak + // cache files into HOME (which CleanupEnvironment rebinds to pwd). + t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) } // NullEnvironment sets up an empty environment with absolutely no environment variables set. diff --git a/libs/cache/cache.go b/libs/cache/cache.go index 513f7ebd00..3fca87ed94 100644 --- a/libs/cache/cache.go +++ b/libs/cache/cache.go @@ -13,6 +13,15 @@ type cacheImpl interface { // The compute function must return JSON-encoded data as []byte. // The returned []byte is also expected to be JSON-encoded. getOrComputeJSON(ctx context.Context, fingerprint any, compute func(ctx context.Context) ([]byte, error)) ([]byte, error) + + // getJSON returns cached JSON bytes for fingerprint, or (nil, false) on + // miss or when caching is disabled. Never computes, never writes. + getJSON(ctx context.Context, fingerprint any) ([]byte, bool) + + // putJSON writes data to the cache under fingerprint, overwriting any + // existing entry. When caching is disabled it is a no-op. Failures are + // silent (logged at debug). + putJSON(ctx context.Context, fingerprint any, data []byte) } // Cache provides a concrete cache that works with any type through the generic GetOrCompute function. @@ -21,6 +30,25 @@ type Cache struct { impl cacheImpl } +// Get returns the cached value for the given fingerprint, or (zero, false) on +// miss. Unlike GetOrCompute it never invokes compute and never writes. Use +// this when the caller wants a read-only probe and will handle a miss +// explicitly, without the cache-level "error while computing" log that an +// erroring compute callback would emit. +func Get[T any](ctx context.Context, c *Cache, fingerprint any) (T, bool) { + var zero T + data, ok := c.impl.getJSON(ctx, fingerprint) + if !ok { + return zero, false + } + var result T + if err := json.Unmarshal(data, &result); err != nil { + log.Debugf(ctx, "[Local Cache] failed to unmarshal cached data: %v", err) + return zero, false + } + return result, true +} + // GetOrCompute retrieves cached content for the given fingerprint, or computes it using the provided function. // If the content is found in cache, it is returned directly. // If not found, the compute function is called, its result is cached, and then returned. @@ -56,3 +84,17 @@ func GetOrCompute[T any](ctx context.Context, c *Cache, fingerprint any, compute return result, nil } + +// Put serializes value to JSON and writes it to the cache under fingerprint, +// overwriting any existing entry. Failures are silent; when caching is +// disabled it is a no-op. Use this when the caller wants an unconditional +// write (e.g. recording a negative sentinel) rather than the read-then-write +// semantics of GetOrCompute. +func Put[T any](ctx context.Context, c *Cache, fingerprint any, value T) { + data, err := json.Marshal(value) + if err != nil { + log.Debugf(ctx, "[Local Cache] failed to marshal value for cache write: %v", err) + return + } + c.impl.putJSON(ctx, fingerprint, data) +} diff --git a/libs/cache/file_cache.go b/libs/cache/file_cache.go index 44e9c5aba5..3d03fa3d16 100644 --- a/libs/cache/file_cache.go +++ b/libs/cache/file_cache.go @@ -2,7 +2,9 @@ package cache import ( "context" + "errors" "fmt" + "io/fs" "os" "path/filepath" "sync" @@ -143,6 +145,34 @@ func NewCache(ctx context.Context, component string, expiry time.Duration, metri return &Cache{impl: fc} } +func (fc *fileCache) putJSON(ctx context.Context, fingerprint any, data []byte) { + if !fc.cacheEnabled { + return + } + cacheKey, err := fingerprintToHash(fingerprint) + if err != nil { + log.Debugf(ctx, "[Local Cache] failed to generate cache key for put: %v", err) + return + } + fc.mu.Lock() + defer fc.mu.Unlock() + fc.writeToCacheJSON(ctx, fc.getCachePath(cacheKey), data) +} + +func (fc *fileCache) getJSON(ctx context.Context, fingerprint any) ([]byte, bool) { + if !fc.cacheEnabled { + return nil, false + } + cacheKey, err := fingerprintToHash(fingerprint) + if err != nil { + log.Debugf(ctx, "[Local Cache] failed to generate cache key: %v", err) + return nil, false + } + fc.mu.Lock() + defer fc.mu.Unlock() + return fc.readFromCacheJSON(ctx, fc.getCachePath(cacheKey)) +} + func (fc *fileCache) addTelemetryMetric(key string) { if fc.metrics != nil { fc.metrics.SetBoolValue(key, true) @@ -217,7 +247,13 @@ func (fc *fileCache) readFromCacheJSON(ctx context.Context, cachePath string) ([ // Check file modification time for expiry info, err := os.Stat(cachePath) if err != nil { - log.Debugf(ctx, "[Local Cache] failed to stat cache file: %v", err) + // ErrNotExist is the common miss case; logging it adds noise and + // diverges across OSes (Unix: "no such file or directory"; + // Windows: "The system cannot find the file specified."). The + // follow-up "cache miss, computing" line already captures it. + if !errors.Is(err, fs.ErrNotExist) { + log.Debugf(ctx, "[Local Cache] failed to stat cache file: %v", err) + } return nil, false } diff --git a/libs/cache/file_cache_test.go b/libs/cache/file_cache_test.go index 3a8470c59d..8a47f41c20 100644 --- a/libs/cache/file_cache_test.go +++ b/libs/cache/file_cache_test.go @@ -122,6 +122,45 @@ func TestFileCacheGetOrCompute(t *testing.T) { assert.Equal(t, int32(1), atomic.LoadInt32(&computeCalls)) } +func TestFileCachePut(t *testing.T) { + ctx := t.Context() + cacheDir := t.TempDir() + ctx = env.Set(ctx, "DATABRICKS_CACHE_ENABLED", "true") + ctx = env.Set(ctx, "DATABRICKS_CACHE_DIR", cacheDir) + + cache := NewCache(ctx, "test-component", 60*time.Minute, nil) + fingerprint := struct { + Key string `json:"key"` + }{Key: "put-test"} + + Put(ctx, cache, fingerprint, "first") + got, ok := Get[string](ctx, cache, fingerprint) + require.True(t, ok) + assert.Equal(t, "first", got) + + // Put overwrites, unlike GetOrCompute which preserves existing entries. + Put(ctx, cache, fingerprint, "second") + got, ok = Get[string](ctx, cache, fingerprint) + require.True(t, ok) + assert.Equal(t, "second", got) +} + +func TestFileCachePutDisabled(t *testing.T) { + ctx := t.Context() + cacheDir := t.TempDir() + ctx = env.Set(ctx, "DATABRICKS_CACHE_ENABLED", "false") + ctx = env.Set(ctx, "DATABRICKS_CACHE_DIR", cacheDir) + + cache := NewCache(ctx, "test-component", 60*time.Minute, nil) + fingerprint := struct { + Key string `json:"key"` + }{Key: "put-disabled"} + + Put(ctx, cache, fingerprint, "value") + _, ok := Get[string](ctx, cache, fingerprint) + assert.False(t, ok, "disabled cache must not persist Put writes") +} + func TestFileCacheGetOrComputeError(t *testing.T) { ctx := t.Context() tempDir := t.TempDir() diff --git a/libs/cache/noop_file_cache.go b/libs/cache/noop_file_cache.go index 4b71be43fc..3d79e8d887 100644 --- a/libs/cache/noop_file_cache.go +++ b/libs/cache/noop_file_cache.go @@ -7,3 +7,10 @@ type noopFileCache struct{} func (c *noopFileCache) getOrComputeJSON(ctx context.Context, fingerprint any, compute func(ctx context.Context) ([]byte, error)) ([]byte, error) { return compute(ctx) } + +func (c *noopFileCache) getJSON(ctx context.Context, fingerprint any) ([]byte, bool) { + return nil, false +} + +func (c *noopFileCache) putJSON(ctx context.Context, fingerprint any, data []byte) { +} diff --git a/libs/hostmetadata/resolver.go b/libs/hostmetadata/resolver.go new file mode 100644 index 0000000000..595e37bd22 --- /dev/null +++ b/libs/hostmetadata/resolver.go @@ -0,0 +1,93 @@ +// Package hostmetadata provides a cached implementation of the SDK's +// HostMetadataResolver, backed by the CLI's shared file cache. +// +// Importing this package (typically via a blank import from main) installs +// [config.DefaultHostMetadataResolverFactory] so every *config.Config the +// CLI constructs automatically gets the cached resolver on first EnsureResolved. +package hostmetadata + +import ( + "context" + "errors" + "time" + + "github.com/databricks/cli/libs/cache" + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go/config" +) + +const ( + positiveCacheComponent = "host-metadata" + negativeCacheComponent = "host-metadata-negative" + positiveCacheTTL = 1 * time.Hour + negativeCacheTTL = 60 * time.Second +) + +// errNegativeHit is returned from the positive-cache compute callback when the +// negative cache already has a sentinel for the host. It signals the outer +// resolver to return (nil, nil) without running fetch or writing to positive. +var errNegativeHit = errors.New("negative cache hit") + +type hostFingerprint struct { + Host string `json:"host"` +} + +// negativeSentinel marks a host whose last fetch failed. Only presence matters; +// the original error text is deliberately not persisted to disk. +type negativeSentinel struct { + Error bool `json:"error"` +} + +func init() { + config.DefaultHostMetadataResolverFactory = func(cfg *config.Config) config.HostMetadataResolver { + return NewResolver(cfg.DefaultHostMetadataResolver()) + } +} + +// NewResolver returns a HostMetadataResolver backed by a positive and negative +// file cache. On positive hit it returns the cached metadata; on miss it +// probes the negative cache, then falls through to fetch and records failures +// so subsequent calls within negativeCacheTTL skip the network. The fetch +// function is invoked on miss, typically cfg.DefaultHostMetadataResolver(). +func NewResolver(fetch config.HostMetadataResolver) config.HostMetadataResolver { + // The SDK factory signature (func(cfg *config.Config) HostMetadataResolver) + // gives us no caller ctx at construction, so Background is the only option + // here. cache.NewCache uses ctx only for a one-time env lookup and + // cleanup-walk logging; per-call ctx still flows through the returned + // resolver below. + ctx := context.Background() //nolint:gocritic // forced by SDK factory signature; see comment above. + positive := cache.NewCache(ctx, positiveCacheComponent, positiveCacheTTL, nil) + negative := cache.NewCache(ctx, negativeCacheComponent, negativeCacheTTL, nil) + + return func(ctx context.Context, host string) (*config.HostMetadata, error) { + fp := hostFingerprint{Host: host} + + // Positive cache wraps the whole miss path so that the happy path (hit) + // is a single disk read — no synthetic probe, no negative-cache traffic. + meta, err := cache.GetOrCompute[*config.HostMetadata](ctx, positive, fp, func(ctx context.Context) (*config.HostMetadata, error) { + if sentinel, ok := cache.Get[*negativeSentinel](ctx, negative, fp); ok && sentinel != nil && sentinel.Error { + log.Debugf(ctx, "[hostmetadata] negative cache hit for %s", host) + return nil, errNegativeHit + } + return fetch(ctx, host) + }) + if err == nil { + return meta, nil + } + if errors.Is(err, errNegativeHit) { + return nil, nil + } + // Transient errors (cancellation, deadline) say nothing about the + // host's long-term availability — don't write a negative sentinel. + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return nil, nil + } + // The raw error is env-dependent (DNS vs TLS vs HTTP) and would make + // acceptance goldens brittle, so keep it at Debug; the Warn text is + // stable (host only) for user visibility. + log.Warnf(ctx, "[hostmetadata] failed to fetch host metadata for %s, will skip for %s", host, negativeCacheTTL) + log.Debugf(ctx, "[hostmetadata] fetch error for %s: %v", host, err) + cache.Put(ctx, negative, fp, &negativeSentinel{Error: true}) + return nil, nil + } +} diff --git a/libs/hostmetadata/resolver_test.go b/libs/hostmetadata/resolver_test.go new file mode 100644 index 0000000000..965f38cd7e --- /dev/null +++ b/libs/hostmetadata/resolver_test.go @@ -0,0 +1,108 @@ +package hostmetadata_test + +import ( + "context" + "errors" + "net/http" + "net/http/httptest" + "sync/atomic" + "testing" + + "github.com/databricks/cli/libs/hostmetadata" + "github.com/databricks/databricks-sdk-go/config" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewResolver_CacheHit_SkipsFetch(t *testing.T) { + t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) + + var calls atomic.Int32 + fetch := func(ctx context.Context, host string) (*config.HostMetadata, error) { + calls.Add(1) + return &config.HostMetadata{AccountID: "acct-1"}, nil + } + r := hostmetadata.NewResolver(fetch) + + m1, err := r(t.Context(), "https://example") + require.NoError(t, err) + assert.Equal(t, "acct-1", m1.AccountID) + + m2, err := r(t.Context(), "https://example") + require.NoError(t, err) + assert.Equal(t, "acct-1", m2.AccountID) + + assert.Equal(t, int32(1), calls.Load(), "second call must be served from cache") +} + +func TestNewResolver_FetchError_CachesNegative(t *testing.T) { + t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) + + var calls atomic.Int32 + fetch := func(ctx context.Context, host string) (*config.HostMetadata, error) { + calls.Add(1) + return nil, errors.New("boom") + } + r := hostmetadata.NewResolver(fetch) + + m, err := r(t.Context(), "https://example") + require.NoError(t, err, "fetch errors must be swallowed (SDK sees (nil, nil) = no metadata)") + assert.Nil(t, m) + + first := calls.Load() + require.GreaterOrEqual(t, first, int32(1)) + + _, err = r(t.Context(), "https://example") + require.NoError(t, err) + assert.Equal(t, first, calls.Load(), "negative cache must skip the fetch") +} + +func TestNewResolver_CancellationNotCached(t *testing.T) { + t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) + + var calls atomic.Int32 + fetch := func(ctx context.Context, host string) (*config.HostMetadata, error) { + calls.Add(1) + return nil, context.Canceled + } + r := hostmetadata.NewResolver(fetch) + + m1, err := r(t.Context(), "https://example") + require.NoError(t, err) + assert.Nil(t, m1) + + m2, err := r(t.Context(), "https://example") + require.NoError(t, err) + assert.Nil(t, m2) + + assert.Equal(t, int32(2), calls.Load(), "cancellation must not be negatively cached") +} + +// TestFactory_EndToEnd_CacheHitSkipsSDKFetch is an integration sanity check +// that importing hostmetadata installs a factory which back-fills every +// *config.Config with a cached resolver. Two independent configs sharing +// DATABRICKS_CACHE_DIR must hit the well-known endpoint once, not twice. +func TestFactory_EndToEnd_CacheHitSkipsSDKFetch(t *testing.T) { + t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) + + var hits atomic.Int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/.well-known/databricks-config" { + hits.Add(1) + _, _ = w.Write([]byte(`{"oidc_endpoint":"https://example.com/oidc","account_id":"acct-1","cloud":"aws"}`)) + return + } + w.WriteHeader(http.StatusNotFound) + })) + t.Cleanup(server.Close) + + cfg1 := &config.Config{Host: server.URL, Token: "x", Credentials: config.PatCredentials{}} + require.NoError(t, cfg1.EnsureResolved()) + require.Equal(t, int32(1), hits.Load()) + + cfg2 := &config.Config{Host: server.URL, Token: "x", Credentials: config.PatCredentials{}} + require.NoError(t, cfg2.EnsureResolved()) + + assert.Equal(t, "acct-1", cfg2.AccountID) + assert.Equal(t, int32(1), hits.Load(), "second EnsureResolved must not hit the server") +} diff --git a/libs/testdiff/replacement.go b/libs/testdiff/replacement.go index 188a623cc8..e293e74ede 100644 --- a/libs/testdiff/replacement.go +++ b/libs/testdiff/replacement.go @@ -25,8 +25,10 @@ var ( uuidRegex = regexp.MustCompile(`[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}`) numIdRegex = regexp.MustCompile(`[0-9]{3,}`) privatePathRegex = regexp.MustCompile(`(/tmp|/private)(/.*)/([a-zA-Z0-9]+)`) - // Version could v0.0.0-dev+21e1aacf518a or just v0.0.0-dev (the latter is currently the case on Windows) - devVersionRegex = regexp.MustCompile(`0\.0\.0-dev(\+[a-f0-9]{10,16})?`) + // Version could be v0.0.0-dev+21e1aacf518a, v0.0.0-dev-21e1aacf518a (the + // filesystem-sanitized form used in cache paths), or just v0.0.0-dev + // (currently the case on Windows). + devVersionRegex = regexp.MustCompile(`0\.0\.0-dev([-+][a-f0-9]{10,16})?`) // Matches databricks-sdk-go/0.90.0 sdkVersionRegex = regexp.MustCompile(`databricks-sdk-go/[0-9]+\.[0-9]+\.[0-9]+`) ) diff --git a/main.go b/main.go index c568e6adbd..e81dde6946 100644 --- a/main.go +++ b/main.go @@ -6,6 +6,10 @@ import ( "github.com/databricks/cli/cmd" "github.com/databricks/cli/cmd/root" + + // Registers a disk-cached HostMetadataResolver factory on the SDK so every + // *config.Config the CLI constructs reuses the cached /.well-known lookup. + _ "github.com/databricks/cli/libs/hostmetadata" ) func main() {