From eb8909f743516a8c0e042d093577766d2d9fd138 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Apr 2026 11:04:29 +0200 Subject: [PATCH 01/25] Add hostmetadata package with caching resolver skeleton --- libs/hostmetadata/resolver.go | 78 ++++++++++++++++++++++++++++++ libs/hostmetadata/resolver_test.go | 21 ++++++++ 2 files changed, 99 insertions(+) create mode 100644 libs/hostmetadata/resolver.go create mode 100644 libs/hostmetadata/resolver_test.go diff --git a/libs/hostmetadata/resolver.go b/libs/hostmetadata/resolver.go new file mode 100644 index 0000000000..e67eda993c --- /dev/null +++ b/libs/hostmetadata/resolver.go @@ -0,0 +1,78 @@ +// Package hostmetadata provides a cached implementation of the SDK's +// HostMetadataResolver, backed by the CLI's shared file cache. +package hostmetadata + +import ( + "context" + "errors" + "time" + + "github.com/databricks/cli/libs/cache" + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go/config" +) + +const ( + positiveCacheComponent = "host-metadata" + negativeCacheComponent = "host-metadata-negative" + positiveCacheTTL = 1 * time.Hour + negativeCacheTTL = 60 * time.Second +) + +// errNotCached forces a cache miss in the negative-cache probe without storing +// anything, since GetOrCompute only writes on success. +var errNotCached = errors.New("not cached") + +// hostFingerprint is the cache key for a given host. +type hostFingerprint struct { + Host string +} + +// negativeSentinel records a failed host-metadata fetch in the negative cache. +type negativeSentinel struct { + Error bool + Message string +} + +// Attach creates caching wrappers for positive and negative host-metadata +// results and installs them on cfg. +func Attach(ctx context.Context, cfg *config.Config) error { + positive := cache.NewCache(ctx, positiveCacheComponent, positiveCacheTTL, nil) + negative := cache.NewCache(ctx, negativeCacheComponent, negativeCacheTTL, nil) + cfg.HostMetadataResolver = newResolver(cfg, positive, negative) + return nil +} + +// newResolver returns a HostMetadataResolver that consults the negative cache +// before hitting the positive cache, and records failed fetches so subsequent +// calls within negativeCacheTTL skip the network entirely. +func newResolver(cfg *config.Config, positive, negative *cache.Cache) config.HostMetadataResolver { + return func(ctx context.Context, host string) (*config.HostMetadata, error) { + fp := hostFingerprint{Host: host} + + // Check negative cache first. errNotCached makes GetOrCompute skip the + // write, so this is a read-only probe. + sentinel, err := cache.GetOrCompute[*negativeSentinel](ctx, negative, fp, func(ctx context.Context) (*negativeSentinel, error) { + return nil, errNotCached + }) + if err == nil && sentinel != nil && sentinel.Error { + log.Debugf(ctx, "[hostmetadata] negative cache hit for %s: %s", host, sentinel.Message) + return nil, nil + } + + // Positive cache: on miss, delegate to the SDK's default HTTP resolver. + meta, err := cache.GetOrCompute[*config.HostMetadata](ctx, positive, fp, func(ctx context.Context) (*config.HostMetadata, error) { + return cfg.DefaultHostMetadataResolver()(ctx, host) + }) + if err != nil { + log.Debugf(ctx, "[hostmetadata] fetch failed for %s, recording negative: %v", host, err) + // Best-effort write to negative cache; ignore errors. + _, _ = cache.GetOrCompute[*negativeSentinel](ctx, negative, fp, func(ctx context.Context) (*negativeSentinel, error) { + return &negativeSentinel{Error: true, Message: err.Error()}, nil + }) + return nil, nil + } + + return meta, nil + } +} diff --git a/libs/hostmetadata/resolver_test.go b/libs/hostmetadata/resolver_test.go new file mode 100644 index 0000000000..50d43e65bf --- /dev/null +++ b/libs/hostmetadata/resolver_test.go @@ -0,0 +1,21 @@ +package hostmetadata_test + +import ( + "testing" + + "github.com/databricks/cli/libs/hostmetadata" + "github.com/databricks/databricks-sdk-go/config" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestAttach_SetsResolverOnConfig(t *testing.T) { + ctx := t.Context() + cfg := &config.Config{Host: "https://example.cloud.databricks.com"} + assert.Nil(t, cfg.HostMetadataResolver) + + err := hostmetadata.Attach(ctx, cfg) + require.NoError(t, err) + + assert.NotNil(t, cfg.HostMetadataResolver) +} From fa9b17465082588a8608b618993ae636e37fc986 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Apr 2026 11:06:57 +0200 Subject: [PATCH 02/25] Fix Task 1 spec deviations --- libs/hostmetadata/resolver.go | 9 ++++----- libs/hostmetadata/resolver_test.go | 5 ++--- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/libs/hostmetadata/resolver.go b/libs/hostmetadata/resolver.go index e67eda993c..a2b8661334 100644 --- a/libs/hostmetadata/resolver.go +++ b/libs/hostmetadata/resolver.go @@ -25,22 +25,21 @@ var errNotCached = errors.New("not cached") // hostFingerprint is the cache key for a given host. type hostFingerprint struct { - Host string + Host string `json:"host"` } // negativeSentinel records a failed host-metadata fetch in the negative cache. type negativeSentinel struct { - Error bool - Message string + Error bool `json:"error"` + Message string `json:"message"` } // Attach creates caching wrappers for positive and negative host-metadata // results and installs them on cfg. -func Attach(ctx context.Context, cfg *config.Config) error { +func Attach(ctx context.Context, cfg *config.Config) { positive := cache.NewCache(ctx, positiveCacheComponent, positiveCacheTTL, nil) negative := cache.NewCache(ctx, negativeCacheComponent, negativeCacheTTL, nil) cfg.HostMetadataResolver = newResolver(cfg, positive, negative) - return nil } // newResolver returns a HostMetadataResolver that consults the negative cache diff --git a/libs/hostmetadata/resolver_test.go b/libs/hostmetadata/resolver_test.go index 50d43e65bf..f5cad8b703 100644 --- a/libs/hostmetadata/resolver_test.go +++ b/libs/hostmetadata/resolver_test.go @@ -12,10 +12,9 @@ import ( func TestAttach_SetsResolverOnConfig(t *testing.T) { ctx := t.Context() cfg := &config.Config{Host: "https://example.cloud.databricks.com"} - assert.Nil(t, cfg.HostMetadataResolver) + require.Nil(t, cfg.HostMetadataResolver) - err := hostmetadata.Attach(ctx, cfg) - require.NoError(t, err) + hostmetadata.Attach(ctx, cfg) assert.NotNil(t, cfg.HostMetadataResolver) } From 98569734129730469cceb19029b099664ba07741 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Apr 2026 11:11:56 +0200 Subject: [PATCH 03/25] Isolate hostmetadata test from user cache directory --- libs/hostmetadata/resolver_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/hostmetadata/resolver_test.go b/libs/hostmetadata/resolver_test.go index f5cad8b703..c20776f4d3 100644 --- a/libs/hostmetadata/resolver_test.go +++ b/libs/hostmetadata/resolver_test.go @@ -10,6 +10,7 @@ import ( ) func TestAttach_SetsResolverOnConfig(t *testing.T) { + t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) ctx := t.Context() cfg := &config.Config{Host: "https://example.cloud.databricks.com"} require.Nil(t, cfg.HostMetadataResolver) From 2678fe3f4d9e4c70e2bc31183d46e12def0a3ad7 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Apr 2026 11:14:24 +0200 Subject: [PATCH 04/25] Add hostmetadata cache hit/miss tests --- libs/hostmetadata/resolver_test.go | 54 ++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/libs/hostmetadata/resolver_test.go b/libs/hostmetadata/resolver_test.go index c20776f4d3..41d36c9953 100644 --- a/libs/hostmetadata/resolver_test.go +++ b/libs/hostmetadata/resolver_test.go @@ -1,6 +1,9 @@ package hostmetadata_test import ( + "net/http" + "net/http/httptest" + "sync/atomic" "testing" "github.com/databricks/cli/libs/hostmetadata" @@ -19,3 +22,54 @@ func TestAttach_SetsResolverOnConfig(t *testing.T) { assert.NotNil(t, cfg.HostMetadataResolver) } + +func TestCachingResolver_CacheMiss_DelegatesToSDKFetch(t *testing.T) { + t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) + ctx := t.Context() + + var hits atomic.Int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/.well-known/databricks-config" { + hits.Add(1) + _, _ = w.Write([]byte(`{"oidc_endpoint":"https://example.com/oidc","account_id":"acct-1","cloud":"aws"}`)) + return + } + w.WriteHeader(http.StatusNotFound) + })) + t.Cleanup(server.Close) + + cfg := &config.Config{Host: server.URL, Token: "x", Credentials: config.PatCredentials{}} + hostmetadata.Attach(ctx, cfg) + require.NoError(t, cfg.EnsureResolved()) + + assert.Equal(t, "acct-1", cfg.AccountID) + assert.Equal(t, int32(1), hits.Load()) +} + +func TestCachingResolver_CacheHit_SkipsSDKFetch(t *testing.T) { + t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) + ctx := t.Context() + + var hits atomic.Int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/.well-known/databricks-config" { + hits.Add(1) + _, _ = w.Write([]byte(`{"oidc_endpoint":"https://example.com/oidc","account_id":"acct-1","cloud":"aws"}`)) + return + } + w.WriteHeader(http.StatusNotFound) + })) + t.Cleanup(server.Close) + + cfg1 := &config.Config{Host: server.URL, Token: "x", Credentials: config.PatCredentials{}} + hostmetadata.Attach(ctx, cfg1) + require.NoError(t, cfg1.EnsureResolved()) + require.Equal(t, int32(1), hits.Load()) + + cfg2 := &config.Config{Host: server.URL, Token: "x", Credentials: config.PatCredentials{}} + hostmetadata.Attach(ctx, cfg2) + require.NoError(t, cfg2.EnsureResolved()) + + assert.Equal(t, "acct-1", cfg2.AccountID) + assert.Equal(t, int32(1), hits.Load(), "second EnsureResolved must not hit the server") +} From b71f298baaadd1c5117919ac90d030ff52aadb1b Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Apr 2026 11:20:03 +0200 Subject: [PATCH 05/25] Add hostmetadata negative-cache and host-isolation tests --- libs/hostmetadata/resolver_test.go | 59 ++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/libs/hostmetadata/resolver_test.go b/libs/hostmetadata/resolver_test.go index 41d36c9953..170f11ca91 100644 --- a/libs/hostmetadata/resolver_test.go +++ b/libs/hostmetadata/resolver_test.go @@ -73,3 +73,62 @@ func TestCachingResolver_CacheHit_SkipsSDKFetch(t *testing.T) { assert.Equal(t, "acct-1", cfg2.AccountID) assert.Equal(t, int32(1), hits.Load(), "second EnsureResolved must not hit the server") } + +func TestCachingResolver_FetchError_CachesNegative(t *testing.T) { + t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) + ctx := t.Context() + + var hits atomic.Int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/.well-known/databricks-config" { + hits.Add(1) + w.WriteHeader(http.StatusInternalServerError) + return + } + w.WriteHeader(http.StatusNotFound) + })) + t.Cleanup(server.Close) + + cfg1 := &config.Config{Host: server.URL, Token: "x", Credentials: config.PatCredentials{}} + hostmetadata.Attach(ctx, cfg1) + require.NoError(t, cfg1.EnsureResolved(), "fetch error must be non-fatal") + + firstHits := hits.Load() + require.GreaterOrEqual(t, firstHits, int32(1), "first resolve must have hit the server") + + cfg2 := &config.Config{Host: server.URL, Token: "x", Credentials: config.PatCredentials{}} + hostmetadata.Attach(ctx, cfg2) + require.NoError(t, cfg2.EnsureResolved(), "fetch error must stay non-fatal with negative cache hit") + + assert.Equal(t, firstHits, hits.Load(), "negative cache must prevent subsequent fetches") +} + +func TestCachingResolver_DifferentHosts_SeparateEntries(t *testing.T) { + t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) + ctx := t.Context() + + respond := func(accountID string) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/.well-known/databricks-config" { + _, _ = w.Write([]byte(`{"oidc_endpoint":"https://example.com/oidc","account_id":"` + accountID + `","cloud":"aws"}`)) + return + } + w.WriteHeader(http.StatusNotFound) + } + } + serverA := httptest.NewServer(respond("acct-A")) + serverB := httptest.NewServer(respond("acct-B")) + t.Cleanup(serverA.Close) + t.Cleanup(serverB.Close) + + cfgA := &config.Config{Host: serverA.URL, Token: "x", Credentials: config.PatCredentials{}} + cfgB := &config.Config{Host: serverB.URL, Token: "x", Credentials: config.PatCredentials{}} + hostmetadata.Attach(ctx, cfgA) + hostmetadata.Attach(ctx, cfgB) + + require.NoError(t, cfgA.EnsureResolved()) + require.NoError(t, cfgB.EnsureResolved()) + + assert.Equal(t, "acct-A", cfgA.AccountID) + assert.Equal(t, "acct-B", cfgB.AccountID) +} From a8fc7d1f492fe43eef0f53750b5ae36198bf341c Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Apr 2026 11:24:14 +0200 Subject: [PATCH 06/25] Attach hostmetadata cache to root workspace/account clients --- cmd/root/auth.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cmd/root/auth.go b/cmd/root/auth.go index 4a4bd9ab87..cad0cde16b 100644 --- a/cmd/root/auth.go +++ b/cmd/root/auth.go @@ -12,6 +12,7 @@ import ( "github.com/databricks/cli/libs/databrickscfg" "github.com/databricks/cli/libs/databrickscfg/profile" envlib "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/hostmetadata" "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/logdiag" "github.com/databricks/databricks-sdk-go" @@ -156,6 +157,8 @@ func MustAccountClient(cmd *cobra.Command, args []string) error { ctx = cmdctx.SetConfigUsed(ctx, cfg) cmd.SetContext(ctx) + hostmetadata.Attach(ctx, cfg) + profiler := profile.GetProfiler(ctx) resolveDefaultProfile(ctx, cfg) @@ -262,6 +265,8 @@ func MustWorkspaceClient(cmd *cobra.Command, args []string) error { ctx = cmdctx.SetConfigUsed(cmd.Context(), cfg) cmd.SetContext(ctx) + hostmetadata.Attach(ctx, cfg) + // Try to load a bundle configuration if we're allowed to by the caller (see `./auth_options.go`). if !shouldSkipLoadBundle(cmd.Context()) { b := TryConfigureBundle(cmd) From 6d9981c5304b4d1b5f1d3d021613605c68c8e4dd Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Apr 2026 11:29:53 +0200 Subject: [PATCH 07/25] Isolate CLI cache dir in test cleanup environment --- internal/testutil/env.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/internal/testutil/env.go b/internal/testutil/env.go index 2033d4fc83..abf577ece0 100644 --- a/internal/testutil/env.go +++ b/internal/testutil/env.go @@ -27,6 +27,9 @@ func CleanupEnvironment(t TestingT) { if runtime.GOOS == "windows" { t.Setenv("USERPROFILE", pwd) } + // Isolate the CLI cache (host metadata, user cache) so tests don't leak + // cache files into HOME (which CleanupEnvironment rebinds to pwd). + t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) } // NullEnvironment sets up an empty environment with absolutely no environment variables set. From b23299acebf752b5097bb64d786746ce9ef7c4f8 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Apr 2026 12:23:00 +0200 Subject: [PATCH 08/25] Refactor hostmetadata.Attach to drop ctx requirement --- cmd/root/auth.go | 4 ++-- libs/hostmetadata/resolver.go | 25 +++++++++++++++++-------- libs/hostmetadata/resolver_test.go | 21 ++++++++------------- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/cmd/root/auth.go b/cmd/root/auth.go index cad0cde16b..c040f53fd9 100644 --- a/cmd/root/auth.go +++ b/cmd/root/auth.go @@ -157,7 +157,7 @@ func MustAccountClient(cmd *cobra.Command, args []string) error { ctx = cmdctx.SetConfigUsed(ctx, cfg) cmd.SetContext(ctx) - hostmetadata.Attach(ctx, cfg) + hostmetadata.Attach(cfg) profiler := profile.GetProfiler(ctx) @@ -265,7 +265,7 @@ func MustWorkspaceClient(cmd *cobra.Command, args []string) error { ctx = cmdctx.SetConfigUsed(cmd.Context(), cfg) cmd.SetContext(ctx) - hostmetadata.Attach(ctx, cfg) + hostmetadata.Attach(cfg) // Try to load a bundle configuration if we're allowed to by the caller (see `./auth_options.go`). if !shouldSkipLoadBundle(cmd.Context()) { diff --git a/libs/hostmetadata/resolver.go b/libs/hostmetadata/resolver.go index a2b8661334..a4848018ec 100644 --- a/libs/hostmetadata/resolver.go +++ b/libs/hostmetadata/resolver.go @@ -5,6 +5,7 @@ package hostmetadata import ( "context" "errors" + "sync" "time" "github.com/databricks/cli/libs/cache" @@ -34,19 +35,27 @@ type negativeSentinel struct { Message string `json:"message"` } -// Attach creates caching wrappers for positive and negative host-metadata -// results and installs them on cfg. -func Attach(ctx context.Context, cfg *config.Config) { - positive := cache.NewCache(ctx, positiveCacheComponent, positiveCacheTTL, nil) - negative := cache.NewCache(ctx, negativeCacheComponent, negativeCacheTTL, nil) - cfg.HostMetadataResolver = newResolver(cfg, positive, negative) +// Attach installs a caching HostMetadataResolver on cfg. The underlying +// positive and negative caches are created lazily on the first resolver +// invocation, using the ctx the SDK passes into Resolve. +func Attach(cfg *config.Config) { + cfg.HostMetadataResolver = newResolver(cfg) } // newResolver returns a HostMetadataResolver that consults the negative cache // before hitting the positive cache, and records failed fetches so subsequent -// calls within negativeCacheTTL skip the network entirely. -func newResolver(cfg *config.Config, positive, negative *cache.Cache) config.HostMetadataResolver { +// calls within negativeCacheTTL skip the network entirely. The positive and +// negative caches are created lazily on the first invocation. +func newResolver(cfg *config.Config) config.HostMetadataResolver { + var ( + once sync.Once + positive, negative *cache.Cache + ) return func(ctx context.Context, host string) (*config.HostMetadata, error) { + once.Do(func() { + positive = cache.NewCache(ctx, positiveCacheComponent, positiveCacheTTL, nil) + negative = cache.NewCache(ctx, negativeCacheComponent, negativeCacheTTL, nil) + }) fp := hostFingerprint{Host: host} // Check negative cache first. errNotCached makes GetOrCompute skip the diff --git a/libs/hostmetadata/resolver_test.go b/libs/hostmetadata/resolver_test.go index 170f11ca91..18f037f903 100644 --- a/libs/hostmetadata/resolver_test.go +++ b/libs/hostmetadata/resolver_test.go @@ -14,18 +14,16 @@ import ( func TestAttach_SetsResolverOnConfig(t *testing.T) { t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) - ctx := t.Context() cfg := &config.Config{Host: "https://example.cloud.databricks.com"} require.Nil(t, cfg.HostMetadataResolver) - hostmetadata.Attach(ctx, cfg) + hostmetadata.Attach(cfg) assert.NotNil(t, cfg.HostMetadataResolver) } func TestCachingResolver_CacheMiss_DelegatesToSDKFetch(t *testing.T) { t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) - ctx := t.Context() var hits atomic.Int32 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -39,7 +37,7 @@ func TestCachingResolver_CacheMiss_DelegatesToSDKFetch(t *testing.T) { t.Cleanup(server.Close) cfg := &config.Config{Host: server.URL, Token: "x", Credentials: config.PatCredentials{}} - hostmetadata.Attach(ctx, cfg) + hostmetadata.Attach(cfg) require.NoError(t, cfg.EnsureResolved()) assert.Equal(t, "acct-1", cfg.AccountID) @@ -48,7 +46,6 @@ func TestCachingResolver_CacheMiss_DelegatesToSDKFetch(t *testing.T) { func TestCachingResolver_CacheHit_SkipsSDKFetch(t *testing.T) { t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) - ctx := t.Context() var hits atomic.Int32 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -62,12 +59,12 @@ func TestCachingResolver_CacheHit_SkipsSDKFetch(t *testing.T) { t.Cleanup(server.Close) cfg1 := &config.Config{Host: server.URL, Token: "x", Credentials: config.PatCredentials{}} - hostmetadata.Attach(ctx, cfg1) + hostmetadata.Attach(cfg1) require.NoError(t, cfg1.EnsureResolved()) require.Equal(t, int32(1), hits.Load()) cfg2 := &config.Config{Host: server.URL, Token: "x", Credentials: config.PatCredentials{}} - hostmetadata.Attach(ctx, cfg2) + hostmetadata.Attach(cfg2) require.NoError(t, cfg2.EnsureResolved()) assert.Equal(t, "acct-1", cfg2.AccountID) @@ -76,7 +73,6 @@ func TestCachingResolver_CacheHit_SkipsSDKFetch(t *testing.T) { func TestCachingResolver_FetchError_CachesNegative(t *testing.T) { t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) - ctx := t.Context() var hits atomic.Int32 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -90,14 +86,14 @@ func TestCachingResolver_FetchError_CachesNegative(t *testing.T) { t.Cleanup(server.Close) cfg1 := &config.Config{Host: server.URL, Token: "x", Credentials: config.PatCredentials{}} - hostmetadata.Attach(ctx, cfg1) + hostmetadata.Attach(cfg1) require.NoError(t, cfg1.EnsureResolved(), "fetch error must be non-fatal") firstHits := hits.Load() require.GreaterOrEqual(t, firstHits, int32(1), "first resolve must have hit the server") cfg2 := &config.Config{Host: server.URL, Token: "x", Credentials: config.PatCredentials{}} - hostmetadata.Attach(ctx, cfg2) + hostmetadata.Attach(cfg2) require.NoError(t, cfg2.EnsureResolved(), "fetch error must stay non-fatal with negative cache hit") assert.Equal(t, firstHits, hits.Load(), "negative cache must prevent subsequent fetches") @@ -105,7 +101,6 @@ func TestCachingResolver_FetchError_CachesNegative(t *testing.T) { func TestCachingResolver_DifferentHosts_SeparateEntries(t *testing.T) { t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) - ctx := t.Context() respond := func(accountID string) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { @@ -123,8 +118,8 @@ func TestCachingResolver_DifferentHosts_SeparateEntries(t *testing.T) { cfgA := &config.Config{Host: serverA.URL, Token: "x", Credentials: config.PatCredentials{}} cfgB := &config.Config{Host: serverB.URL, Token: "x", Credentials: config.PatCredentials{}} - hostmetadata.Attach(ctx, cfgA) - hostmetadata.Attach(ctx, cfgB) + hostmetadata.Attach(cfgA) + hostmetadata.Attach(cfgB) require.NoError(t, cfgA.EnsureResolved()) require.NoError(t, cfgB.EnsureResolved()) From 609f04431c4c92c27457973145addfea9f94564b Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Apr 2026 12:27:44 +0200 Subject: [PATCH 09/25] Attach hostmetadata cache to bundle workspace config --- bundle/config/workspace.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bundle/config/workspace.go b/bundle/config/workspace.go index c699dc070b..4009c8f3d9 100644 --- a/bundle/config/workspace.go +++ b/bundle/config/workspace.go @@ -6,6 +6,7 @@ import ( "github.com/databricks/cli/libs/auth" "github.com/databricks/cli/libs/databrickscfg" + "github.com/databricks/cli/libs/hostmetadata" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/config" "github.com/databricks/databricks-sdk-go/marshal" @@ -163,6 +164,7 @@ func (w *Workspace) Client() (*databricks.WorkspaceClient, error) { w.NormalizeHostURL() cfg := w.Config() + hostmetadata.Attach(cfg) // If only the host is configured, we try and unambiguously match it to // a profile in the user's databrickscfg file. Override the default loaders. From 62e71f2c1d350889fe4947e4126647edbeb4d261 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Apr 2026 12:35:47 +0200 Subject: [PATCH 10/25] Attach hostmetadata cache at remaining config construction sites --- cmd/api/api.go | 3 +++ cmd/auth/env.go | 2 ++ cmd/auth/login.go | 2 ++ cmd/auth/profiles.go | 2 ++ cmd/labs/project/entrypoint.go | 7 +++++-- cmd/root/auth.go | 8 ++++++-- libs/auth/arguments.go | 2 ++ 7 files changed, 22 insertions(+), 4 deletions(-) diff --git a/cmd/api/api.go b/cmd/api/api.go index 057c8f2246..28f856c0ac 100644 --- a/cmd/api/api.go +++ b/cmd/api/api.go @@ -8,6 +8,7 @@ import ( "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/flags" + "github.com/databricks/cli/libs/hostmetadata" "github.com/databricks/databricks-sdk-go/client" "github.com/databricks/databricks-sdk-go/config" "github.com/spf13/cobra" @@ -55,6 +56,8 @@ func makeCommand(method string) *cobra.Command { cfg.Profile = profileFlag.Value.String() } + hostmetadata.Attach(cfg) + api, err := client.New(cfg) if err != nil { return err diff --git a/cmd/auth/env.go b/cmd/auth/env.go index 11149af8c0..ddaca011db 100644 --- a/cmd/auth/env.go +++ b/cmd/auth/env.go @@ -11,6 +11,7 @@ import ( "strings" "github.com/databricks/cli/libs/databrickscfg/profile" + "github.com/databricks/cli/libs/hostmetadata" "github.com/databricks/databricks-sdk-go/config" "github.com/spf13/cobra" "gopkg.in/ini.v1" @@ -107,6 +108,7 @@ func newEnvCommand() *cobra.Command { Host: host, Profile: profile, } + hostmetadata.Attach(cfg) if profile != "" { cfg.Profile = profile } else if cfg.Host == "" { diff --git a/cmd/auth/login.go b/cmd/auth/login.go index afcf967ab9..c2dc0872ad 100644 --- a/cmd/auth/login.go +++ b/cmd/auth/login.go @@ -17,6 +17,7 @@ import ( "github.com/databricks/cli/libs/databrickscfg/profile" "github.com/databricks/cli/libs/env" "github.com/databricks/cli/libs/exec" + "github.com/databricks/cli/libs/hostmetadata" "github.com/databricks/cli/libs/log" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/config" @@ -472,6 +473,7 @@ func runHostDiscovery(ctx context.Context, authArguments *auth.AuthArguments) { // loading to avoid interference from existing profiles. Loaders: []config.Loader{config.ConfigAttributes}, } + hostmetadata.Attach(cfg) err := cfg.EnsureResolved() if err != nil { diff --git a/cmd/auth/profiles.go b/cmd/auth/profiles.go index 51c397a9ea..f6d5114824 100644 --- a/cmd/auth/profiles.go +++ b/cmd/auth/profiles.go @@ -13,6 +13,7 @@ import ( "github.com/databricks/cli/libs/databrickscfg" "github.com/databricks/cli/libs/databrickscfg/profile" "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/hostmetadata" "github.com/databricks/cli/libs/log" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/config" @@ -42,6 +43,7 @@ func (c *profileMetadata) Load(ctx context.Context, configFilePath string, skipV Profile: c.Name, DatabricksCliPath: env.Get(ctx, "DATABRICKS_CLI_PATH"), } + hostmetadata.Attach(cfg) _ = cfg.EnsureResolved() if cfg.IsAws() { c.Cloud = "aws" diff --git a/cmd/labs/project/entrypoint.go b/cmd/labs/project/entrypoint.go index 335f7c1301..685c916df7 100644 --- a/cmd/labs/project/entrypoint.go +++ b/cmd/labs/project/entrypoint.go @@ -14,6 +14,7 @@ import ( "github.com/databricks/cli/internal/build" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/hostmetadata" "github.com/databricks/cli/libs/log" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/config" @@ -146,14 +147,16 @@ func (e *Entrypoint) envAwareConfig(ctx context.Context) (*config.Config, error) if err != nil { return nil, err } - return &config.Config{ + cfg := &config.Config{ ConfigFile: filepath.Join(home, ".databrickscfg"), Loaders: []config.Loader{ env.NewConfigLoader(ctx), config.ConfigAttributes, config.ConfigFile, }, - }, nil + } + hostmetadata.Attach(cfg) + return cfg, nil } func (e *Entrypoint) envAwareConfigWithProfile(ctx context.Context, profile string) (*config.Config, error) { diff --git a/cmd/root/auth.go b/cmd/root/auth.go index c040f53fd9..8011e8f64f 100644 --- a/cmd/root/auth.go +++ b/cmd/root/auth.go @@ -111,7 +111,9 @@ func accountClientOrPrompt(ctx context.Context, cfg *config.Config, allowPrompt if err != nil { return nil, err } - a, err = databricks.NewAccountClient(&databricks.Config{Profile: profile}) + promptCfg := &databricks.Config{Profile: profile} + hostmetadata.Attach((*config.Config)(promptCfg)) + a, err = databricks.NewAccountClient(promptCfg) if err == nil { err = a.Config.Authenticate(emptyHttpRequest(ctx)) if err != nil { @@ -232,7 +234,9 @@ func workspaceClientOrPrompt(ctx context.Context, cfg *config.Config, allowPromp if err != nil { return nil, err } - w, err = databricks.NewWorkspaceClient(&databricks.Config{Profile: profile}) + promptCfg := &databricks.Config{Profile: profile} + hostmetadata.Attach((*config.Config)(promptCfg)) + w, err = databricks.NewWorkspaceClient(promptCfg) if err == nil { err = w.Config.Authenticate(emptyHttpRequest(ctx)) if err != nil { diff --git a/libs/auth/arguments.go b/libs/auth/arguments.go index 4f724cc801..32f19884d6 100644 --- a/libs/auth/arguments.go +++ b/libs/auth/arguments.go @@ -3,6 +3,7 @@ package auth import ( "strings" + "github.com/databricks/cli/libs/hostmetadata" "github.com/databricks/databricks-sdk-go/config" "github.com/databricks/databricks-sdk-go/credentials/u2m" ) @@ -48,6 +49,7 @@ func (a AuthArguments) ToOAuthArgument() (u2m.OAuthArgument, error) { // based on the explicit fields provided. Loaders: []config.Loader{config.ConfigAttributes}, } + hostmetadata.Attach(cfg) if a.DiscoveryURL != "" { cfg.DiscoveryURL = a.DiscoveryURL From 50a27ca3332bdb0e2bc1693fd76a25bd142be386 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Apr 2026 12:45:18 +0200 Subject: [PATCH 11/25] Replace sync.Once with eager cache init in hostmetadata.Attach --- libs/hostmetadata/resolver.go | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/libs/hostmetadata/resolver.go b/libs/hostmetadata/resolver.go index a4848018ec..4af21381b9 100644 --- a/libs/hostmetadata/resolver.go +++ b/libs/hostmetadata/resolver.go @@ -5,7 +5,6 @@ package hostmetadata import ( "context" "errors" - "sync" "time" "github.com/databricks/cli/libs/cache" @@ -35,27 +34,23 @@ type negativeSentinel struct { Message string `json:"message"` } -// Attach installs a caching HostMetadataResolver on cfg. The underlying -// positive and negative caches are created lazily on the first resolver -// invocation, using the ctx the SDK passes into Resolve. +// Attach installs a caching HostMetadataResolver on cfg. func Attach(cfg *config.Config) { - cfg.HostMetadataResolver = newResolver(cfg) + // cache.NewCache uses ctx only for env lookups and cleanup-walk debug + // logs; there is no cancellation signal to propagate. Using a background + // context keeps Attach callable from sites without a caller ctx in scope + // (e.g. bundle.Workspace.Client). + ctx := context.Background() //nolint:gocritic // Attach has no caller ctx and cache.NewCache does not use ctx for cancellation. + positive := cache.NewCache(ctx, positiveCacheComponent, positiveCacheTTL, nil) + negative := cache.NewCache(ctx, negativeCacheComponent, negativeCacheTTL, nil) + cfg.HostMetadataResolver = newResolver(cfg, positive, negative) } // newResolver returns a HostMetadataResolver that consults the negative cache // before hitting the positive cache, and records failed fetches so subsequent -// calls within negativeCacheTTL skip the network entirely. The positive and -// negative caches are created lazily on the first invocation. -func newResolver(cfg *config.Config) config.HostMetadataResolver { - var ( - once sync.Once - positive, negative *cache.Cache - ) +// calls within negativeCacheTTL skip the network entirely. +func newResolver(cfg *config.Config, positive, negative *cache.Cache) config.HostMetadataResolver { return func(ctx context.Context, host string) (*config.HostMetadata, error) { - once.Do(func() { - positive = cache.NewCache(ctx, positiveCacheComponent, positiveCacheTTL, nil) - negative = cache.NewCache(ctx, negativeCacheComponent, negativeCacheTTL, nil) - }) fp := hostFingerprint{Host: host} // Check negative cache first. errNotCached makes GetOrCompute skip the From 4937738f4e77073c978ea35aae1ea2b8624e276f Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Apr 2026 12:46:32 +0200 Subject: [PATCH 12/25] Attach hostmetadata cache at two missing login construction sites --- cmd/auth/login.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/cmd/auth/login.go b/cmd/auth/login.go index c2dc0872ad..0ca0124504 100644 --- a/cmd/auth/login.go +++ b/cmd/auth/login.go @@ -297,13 +297,15 @@ a new profile is created. // Create a workspace client to list clusters for interactive selection. // We use a custom CredentialsStrategy that wraps the token we just minted, // avoiding the need to spawn a child CLI process (which AuthType "databricks-cli" does). - w, err := databricks.NewWorkspaceClient(&databricks.Config{ + clusterCfg := &databricks.Config{ Host: authArguments.Host, AccountID: authArguments.AccountID, WorkspaceID: authArguments.WorkspaceID, Experimental_IsUnifiedHost: authArguments.IsUnifiedHost, Credentials: config.NewTokenSourceStrategy("login-token", authconv.AuthTokenSource(persistentAuth)), - }) + } + hostmetadata.Attach((*config.Config)(clusterCfg)) + w, err := databricks.NewWorkspaceClient(clusterCfg) if err != nil { return err } @@ -723,11 +725,13 @@ func promptForWorkspaceSelection(ctx context.Context, authArguments *auth.AuthAr return "", nil } - a, err := databricks.NewAccountClient(&databricks.Config{ + selectCfg := &databricks.Config{ Host: authArguments.Host, AccountID: authArguments.AccountID, Credentials: config.NewTokenSourceStrategy("login-token", authconv.AuthTokenSource(persistentAuth)), - }) + } + hostmetadata.Attach((*config.Config)(selectCfg)) + a, err := databricks.NewAccountClient(selectCfg) if err != nil { return "", err } From 2c9b1f33849348d840ecca103259f9ee0c2f7cc3 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Apr 2026 12:50:44 +0200 Subject: [PATCH 13/25] Add guardrail test for hostmetadata.Attach injection sites --- libs/hostmetadata/injection_guardrail_test.go | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 libs/hostmetadata/injection_guardrail_test.go diff --git a/libs/hostmetadata/injection_guardrail_test.go b/libs/hostmetadata/injection_guardrail_test.go new file mode 100644 index 0000000000..b610247d17 --- /dev/null +++ b/libs/hostmetadata/injection_guardrail_test.go @@ -0,0 +1,114 @@ +package hostmetadata_test + +import ( + "io/fs" + "os" + "path/filepath" + "regexp" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// allowlist maps repo-relative paths (forward slashes) to a short reason the +// site doesn't need hostmetadata.Attach. When you add a new entry, write a +// specific reason — "no resolution" is too vague; say "SaveToProfile: write-only". +var allowlist = map[string]string{ + "cmd/auth/auth.go": "CanonicalHostName only (URL munging)", + "cmd/auth/resolve.go": "CanonicalHostName only", + "cmd/auth/logout.go": "CanonicalHostName only", + "cmd/auth/token.go": "SaveToProfile: write-only", + "cmd/configure/configure.go": "SaveToProfile: write-only", + "libs/databrickscfg/profile/profile.go": "CanonicalHostName only", + "libs/databrickscfg/profile/profiler.go": "CanonicalHostName only", + "libs/testproxy/server.go": "test helper, no real auth", + "acceptance/internal/prepare_server.go": "acceptance test infrastructure", + "libs/env/loader.go": "doc comment only, no struct construction", + // Task 6 deliberately skipped these two sites: + // cmd/auth/login.go:setHostAndAccountId (used for HostType() pattern matching only) + // cmd/root/auth.go:~290 (cfg reassigned from already-resolved client) + // Both are in files that ALSO contain Attach calls, so they don't appear + // in this allowlist — the file-level "has Attach" check covers them. +} + +// constructionPattern matches both `config.Config{` and `databricks.Config{` +// struct literals — the two forms we construct in this repo. +var constructionPattern = regexp.MustCompile(`\b(?:config|databricks)\.Config\{`) + +func TestConfigConstructionSitesHaveAttach(t *testing.T) { + repoRoot := findRepoRoot(t) + + var offenders []string + err := filepath.WalkDir(repoRoot, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + // Skip: .git, vendor, .claude (worktrees), acceptance test output dirs. + name := d.Name() + if name == ".git" || name == "vendor" || name == ".claude" || name == "node_modules" { + return fs.SkipDir + } + return nil + } + if !strings.HasSuffix(path, ".go") { + return nil + } + // Skip test files — we only want production code. + if strings.HasSuffix(path, "_test.go") { + return nil + } + + rel, err := filepath.Rel(repoRoot, path) + if err != nil { + return err + } + relSlash := filepath.ToSlash(rel) + + // Allowlist check: if the file is explicitly allowlisted, skip. + if _, ok := allowlist[relSlash]; ok { + return nil + } + + src, err := os.ReadFile(path) + if err != nil { + return err + } + content := string(src) + + if !constructionPattern.MatchString(content) { + return nil + } + if strings.Contains(content, "hostmetadata.Attach(") { + return nil + } + + offenders = append(offenders, relSlash) + return nil + }) + require.NoError(t, err) + + assert.Empty(t, offenders, + "the following files construct *config.Config but do not call hostmetadata.Attach. "+ + "Either add `hostmetadata.Attach(cfg)` before the first resolve, "+ + "or add the file to the allowlist in %s with a specific reason.", + "libs/hostmetadata/injection_guardrail_test.go") +} + +// findRepoRoot walks up from the test's working directory until it finds go.mod. +func findRepoRoot(t *testing.T) string { + dir, err := os.Getwd() + require.NoError(t, err) + for { + if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil { + return dir + } + parent := filepath.Dir(dir) + if parent == dir { + t.Fatal("could not find go.mod walking up from " + dir) + } + dir = parent + } +} From 1a179c5395c44c1c1f9ae9109c31b2e0019edd67 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Apr 2026 13:01:10 +0200 Subject: [PATCH 14/25] Add end-to-end acceptance test for host metadata caching Verifies that two CLI invocations sharing DATABRICKS_CACHE_DIR produce only one /.well-known/databricks-config GET: the first populates the on-disk cache, the second reads from it. Co-authored-by: Isaac --- .../auth/host-metadata-cache/out.test.toml | 5 +++ .../auth/host-metadata-cache/output.txt | 32 +++++++++++++++++++ acceptance/auth/host-metadata-cache/script | 19 +++++++++++ acceptance/auth/host-metadata-cache/test.toml | 5 +++ 4 files changed, 61 insertions(+) create mode 100644 acceptance/auth/host-metadata-cache/out.test.toml create mode 100644 acceptance/auth/host-metadata-cache/output.txt create mode 100644 acceptance/auth/host-metadata-cache/script create mode 100644 acceptance/auth/host-metadata-cache/test.toml diff --git a/acceptance/auth/host-metadata-cache/out.test.toml b/acceptance/auth/host-metadata-cache/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/auth/host-metadata-cache/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/auth/host-metadata-cache/output.txt b/acceptance/auth/host-metadata-cache/output.txt new file mode 100644 index 0000000000..266c9fa93e --- /dev/null +++ b/acceptance/auth/host-metadata-cache/output.txt @@ -0,0 +1,32 @@ + +=== First invocation populates the cache +{ + "profiles": [ + { + "name":"cached", + "host":"[DATABRICKS_URL]", + "cloud":"aws", + "auth_type":"", + "valid":false + } + ] +} + +=== Second invocation should read from the cache +{ + "profiles": [ + { + "name":"cached", + "host":"[DATABRICKS_URL]", + "cloud":"aws", + "auth_type":"", + "valid":false + } + ] +} + +=== Only one /.well-known/databricks-config request recorded +{ + "method": "GET", + "path": "/.well-known/databricks-config" +} diff --git a/acceptance/auth/host-metadata-cache/script b/acceptance/auth/host-metadata-cache/script new file mode 100644 index 0000000000..2f88c987e4 --- /dev/null +++ b/acceptance/auth/host-metadata-cache/script @@ -0,0 +1,19 @@ +sethome "./home" +export DATABRICKS_CACHE_DIR="$TEST_TMP_DIR/cache" + +# Point a profile at the mock server so auth profiles triggers a host metadata +# fetch. Without a profile the command does nothing and the cache is never read. +cat > "./home/.databrickscfg" < Date: Fri, 17 Apr 2026 13:46:01 +0200 Subject: [PATCH 15/25] Regenerate acceptance test expectations for hostmetadata caching Cached /.well-known/databricks-config lookups persist across CLI invocations now, so recorded request logs drop duplicate GETs and debug output shows the new host-metadata cache keys. Silenced SDK warnings on failed well-known fetches (the resolver returns nil,nil) also remove a couple of Warn lines from auth test outputs. Co-authored-by: Isaac --- acceptance/auth/bundle_and_profile/output.txt | 2 -- .../credentials/unified-host/out.requests.txt | 9 -------- .../change-schema-name/out.requests.txt | 8 ------- .../from_flag/out.requests.txt | 4 ---- .../target-is-passed/default/out.requests.txt | 4 ---- .../from_flag/out.requests.txt | 4 ---- acceptance/cache/clear/output.txt | 22 +++++++++++++++++++ acceptance/cache/simple/output.txt | 14 ++++++++++++ acceptance/cmd/auth/profiles/output.txt | 1 - .../cmd/workspace/apps/out.requests.txt | 4 ---- acceptance/telemetry/failure/output.txt | 8 +++++++ .../telemetry/partial-success/output.txt | 8 +++++++ acceptance/telemetry/skipped/output.txt | 8 +++++++ acceptance/telemetry/success/output.txt | 8 +++++++ acceptance/telemetry/test.toml | 8 +++++++ acceptance/telemetry/timeout/output.txt | 8 +++++++ .../lakeview/publish/out.requests.txt | 8 ------- .../create_with_provider/out.requests.txt | 12 ---------- .../repos/delete_by_path/out.requests.txt | 12 ---------- .../repos/get_errors/out.requests.txt | 8 ------- .../workspace/repos/update/out.requests.txt | 16 -------------- 21 files changed, 84 insertions(+), 92 deletions(-) diff --git a/acceptance/auth/bundle_and_profile/output.txt b/acceptance/auth/bundle_and_profile/output.txt index 88deef1256..5501ab5a6e 100644 --- a/acceptance/auth/bundle_and_profile/output.txt +++ b/acceptance/auth/bundle_and_profile/output.txt @@ -13,7 +13,6 @@ === Inside the bundle, profile flag not matching bundle host. Should use profile from the flag and not the bundle. >>> errcode [CLI] current-user me -p profile_name -Warn: Failed to resolve host metadata: (redacted). Falling back to user config. Error: Get "https://non.existing.subdomain.databricks.com/api/2.0/preview/scim/v2/Me": (redacted) Exit code: 1 @@ -73,7 +72,6 @@ Validation OK! === Bundle commands load bundle configuration with -t and -p flag, validation not OK (profile host don't match bundle host) >>> errcode [CLI] bundle validate -t prod -p DEFAULT -Warn: Failed to resolve host metadata: (redacted). Falling back to user config. Error: cannot resolve bundle auth configuration: the host in the profile ([DATABRICKS_TARGET]) doesn’t match the host configured in the bundle (https://bar.com) Name: test-auth diff --git a/acceptance/auth/credentials/unified-host/out.requests.txt b/acceptance/auth/credentials/unified-host/out.requests.txt index e94814526d..c154a54bff 100644 --- a/acceptance/auth/credentials/unified-host/out.requests.txt +++ b/acceptance/auth/credentials/unified-host/out.requests.txt @@ -22,15 +22,6 @@ "method": "GET", "path": "/api/2.0/preview/scim/v2/Me" } -{ - "headers": { - "User-Agent": [ - "cli/[DEV_VERSION] databricks-sdk-go/[SDK_VERSION] go/[GO_VERSION] os/[OS]" - ] - }, - "method": "GET", - "path": "/.well-known/databricks-config" -} { "headers": { "Authorization": [ diff --git a/acceptance/bundle/resources/volumes/change-schema-name/out.requests.txt b/acceptance/bundle/resources/volumes/change-schema-name/out.requests.txt index 6356a3f868..0c03eb00fd 100644 --- a/acceptance/bundle/resources/volumes/change-schema-name/out.requests.txt +++ b/acceptance/bundle/resources/volumes/change-schema-name/out.requests.txt @@ -1,15 +1,7 @@ -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.1/unity-catalog/volumes/main.myschema.myvolume" } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.1/unity-catalog/volumes/main.myschema.mynewvolume" diff --git a/acceptance/bundle/run/inline-script/databricks-cli/profile-is-passed/from_flag/out.requests.txt b/acceptance/bundle/run/inline-script/databricks-cli/profile-is-passed/from_flag/out.requests.txt index c5b36c8f9c..0c01e54a7b 100644 --- a/acceptance/bundle/run/inline-script/databricks-cli/profile-is-passed/from_flag/out.requests.txt +++ b/acceptance/bundle/run/inline-script/databricks-cli/profile-is-passed/from_flag/out.requests.txt @@ -16,10 +16,6 @@ "path": "/oidc/v1/token", "raw_body": "grant_type=client_credentials\u0026scope=all-apis" } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/oidc/.well-known/oauth-authorization-server" diff --git a/acceptance/bundle/run/inline-script/databricks-cli/target-is-passed/default/out.requests.txt b/acceptance/bundle/run/inline-script/databricks-cli/target-is-passed/default/out.requests.txt index 7cf520fe0c..0c815411ae 100644 --- a/acceptance/bundle/run/inline-script/databricks-cli/target-is-passed/default/out.requests.txt +++ b/acceptance/bundle/run/inline-script/databricks-cli/target-is-passed/default/out.requests.txt @@ -2,10 +2,6 @@ "method": "GET", "path": "/.well-known/databricks-config" } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "headers": { "Authorization": [ diff --git a/acceptance/bundle/run/inline-script/databricks-cli/target-is-passed/from_flag/out.requests.txt b/acceptance/bundle/run/inline-script/databricks-cli/target-is-passed/from_flag/out.requests.txt index 3a134bc775..6ba1aad254 100644 --- a/acceptance/bundle/run/inline-script/databricks-cli/target-is-passed/from_flag/out.requests.txt +++ b/acceptance/bundle/run/inline-script/databricks-cli/target-is-passed/from_flag/out.requests.txt @@ -16,10 +16,6 @@ "path": "/oidc/v1/token", "raw_body": "grant_type=client_credentials\u0026scope=all-apis" } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/oidc/.well-known/oauth-authorization-server" diff --git a/acceptance/cache/clear/output.txt b/acceptance/cache/clear/output.txt index bba37b6ccb..dcd7496fd2 100644 --- a/acceptance/cache/clear/output.txt +++ b/acceptance/cache/clear/output.txt @@ -3,10 +3,24 @@ [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing +[DEBUG_TIMESTAMP] Debug: [Local Cache] error while computing: not cached +[DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] +[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) +[DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing +[DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result +[DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] +[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) +[DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing [DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result === Second call in a session is expected to be a cache hit [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] +[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) +[DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing +[DEBUG_TIMESTAMP] Debug: [Local Cache] error while computing: not cached +[DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] +[DEBUG_TIMESTAMP] Debug: [Local Cache] cache hit +[DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] cache hit >>> [CLI] cache clear @@ -16,4 +30,12 @@ Cache cleared successfully from [TEST_TMP_DIR]/.cache [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing +[DEBUG_TIMESTAMP] Debug: [Local Cache] error while computing: not cached +[DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] +[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) +[DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing +[DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result +[DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] +[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) +[DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing [DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result diff --git a/acceptance/cache/simple/output.txt b/acceptance/cache/simple/output.txt index 093900b94b..e9235ad409 100644 --- a/acceptance/cache/simple/output.txt +++ b/acceptance/cache/simple/output.txt @@ -3,10 +3,24 @@ [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing +[DEBUG_TIMESTAMP] Debug: [Local Cache] error while computing: not cached +[DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] +[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) +[DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing +[DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result +[DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] +[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) +[DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing [DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result === Second call in a session is expected to be a cache hit [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] +[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) +[DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing +[DEBUG_TIMESTAMP] Debug: [Local Cache] error while computing: not cached +[DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] +[DEBUG_TIMESTAMP] Debug: [Local Cache] cache hit +[DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] cache hit === Bundle deploy should send telemetry values diff --git a/acceptance/cmd/auth/profiles/output.txt b/acceptance/cmd/auth/profiles/output.txt index 060da0eba5..a04bb2dfaa 100644 --- a/acceptance/cmd/auth/profiles/output.txt +++ b/acceptance/cmd/auth/profiles/output.txt @@ -1,6 +1,5 @@ === Profiles with workspace_id (JSON output) -Warn: Failed to resolve host metadata: fetching host metadata from "https://test.cloud.databricks.com/.well-known/databricks-config": Get "https://test.cloud.databricks.com/.well-known/databricks-config": dial tcp: lookup test.cloud.databricks.com: no such host. Falling back to user config. { "profiles": [ { diff --git a/acceptance/cmd/workspace/apps/out.requests.txt b/acceptance/cmd/workspace/apps/out.requests.txt index 9962050b50..ba4cf8bd6e 100644 --- a/acceptance/cmd/workspace/apps/out.requests.txt +++ b/acceptance/cmd/workspace/apps/out.requests.txt @@ -25,10 +25,6 @@ "method": "GET", "path": "/api/2.0/apps/test-name" } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "PATCH", "path": "/api/2.0/apps/test-name", diff --git a/acceptance/telemetry/failure/output.txt b/acceptance/telemetry/failure/output.txt index af0c34a13e..ebc7d4fd27 100644 --- a/acceptance/telemetry/failure/output.txt +++ b/acceptance/telemetry/failure/output.txt @@ -1,12 +1,20 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" +HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID +HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID +HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { < "oidc_endpoint": "[DATABRICKS_URL]/oidc", < "workspace_id": "[NUMID]" < } pid=PID sdk=true +HH:MM:SS Debug: [Local Cache] computed and stored result pid=PID HH:MM:SS Debug: Resolved workspace_id from host metadata: "[NUMID]" pid=PID sdk=true HH:MM:SS Debug: Resolved cloud from hostname: "AWS" pid=PID sdk=true HH:MM:SS Debug: Resolved discovery_url from host metadata: "[DATABRICKS_URL]/oidc/.well-known/oauth-authorization-server" pid=PID sdk=true diff --git a/acceptance/telemetry/partial-success/output.txt b/acceptance/telemetry/partial-success/output.txt index 113dc11b66..7514fcd43f 100644 --- a/acceptance/telemetry/partial-success/output.txt +++ b/acceptance/telemetry/partial-success/output.txt @@ -1,12 +1,20 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" +HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID +HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID +HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { < "oidc_endpoint": "[DATABRICKS_URL]/oidc", < "workspace_id": "[NUMID]" < } pid=PID sdk=true +HH:MM:SS Debug: [Local Cache] computed and stored result pid=PID HH:MM:SS Debug: Resolved workspace_id from host metadata: "[NUMID]" pid=PID sdk=true HH:MM:SS Debug: Resolved cloud from hostname: "AWS" pid=PID sdk=true HH:MM:SS Debug: Resolved discovery_url from host metadata: "[DATABRICKS_URL]/oidc/.well-known/oauth-authorization-server" pid=PID sdk=true diff --git a/acceptance/telemetry/skipped/output.txt b/acceptance/telemetry/skipped/output.txt index e85ce380a4..e4a5d7ed0e 100644 --- a/acceptance/telemetry/skipped/output.txt +++ b/acceptance/telemetry/skipped/output.txt @@ -1,12 +1,20 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" +HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID +HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID +HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { < "oidc_endpoint": "[DATABRICKS_URL]/oidc", < "workspace_id": "[NUMID]" < } pid=PID sdk=true +HH:MM:SS Debug: [Local Cache] computed and stored result pid=PID HH:MM:SS Debug: Resolved workspace_id from host metadata: "[NUMID]" pid=PID sdk=true HH:MM:SS Debug: Resolved cloud from hostname: "AWS" pid=PID sdk=true HH:MM:SS Debug: Resolved discovery_url from host metadata: "[DATABRICKS_URL]/oidc/.well-known/oauth-authorization-server" pid=PID sdk=true diff --git a/acceptance/telemetry/success/output.txt b/acceptance/telemetry/success/output.txt index f3b410f765..fa17ff9d70 100644 --- a/acceptance/telemetry/success/output.txt +++ b/acceptance/telemetry/success/output.txt @@ -1,12 +1,20 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" +HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID +HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID +HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { < "oidc_endpoint": "[DATABRICKS_URL]/oidc", < "workspace_id": "[NUMID]" < } pid=PID sdk=true +HH:MM:SS Debug: [Local Cache] computed and stored result pid=PID HH:MM:SS Debug: Resolved workspace_id from host metadata: "[NUMID]" pid=PID sdk=true HH:MM:SS Debug: Resolved cloud from hostname: "AWS" pid=PID sdk=true HH:MM:SS Debug: Resolved discovery_url from host metadata: "[DATABRICKS_URL]/oidc/.well-known/oauth-authorization-server" pid=PID sdk=true diff --git a/acceptance/telemetry/test.toml b/acceptance/telemetry/test.toml index 574ffd3ce1..32660c3b81 100644 --- a/acceptance/telemetry/test.toml +++ b/acceptance/telemetry/test.toml @@ -36,3 +36,11 @@ New = "pid=PID" [[Repls]] Old = "\\([0-9]+ more bytes\\)" New = "(N more bytes)" + +# Host metadata cache keys vary per-test because the mock server URL changes. +# Normalize them so the golden output is stable across runs. +# Order=1 so it runs before the parent's `\d{14,}` → `[NUMID]` replacement. +[[Repls]] +Old = '[a-f0-9]{64}' +New = "[SHA256_HASH]" +Order = 1 diff --git a/acceptance/telemetry/timeout/output.txt b/acceptance/telemetry/timeout/output.txt index a124cc72b6..11aeef2ebd 100644 --- a/acceptance/telemetry/timeout/output.txt +++ b/acceptance/telemetry/timeout/output.txt @@ -1,12 +1,20 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" +HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID +HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID +HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { < "oidc_endpoint": "[DATABRICKS_URL]/oidc", < "workspace_id": "[NUMID]" < } pid=PID sdk=true +HH:MM:SS Debug: [Local Cache] computed and stored result pid=PID HH:MM:SS Debug: Resolved workspace_id from host metadata: "[NUMID]" pid=PID sdk=true HH:MM:SS Debug: Resolved cloud from hostname: "AWS" pid=PID sdk=true HH:MM:SS Debug: Resolved discovery_url from host metadata: "[DATABRICKS_URL]/oidc/.well-known/oauth-authorization-server" pid=PID sdk=true diff --git a/acceptance/workspace/lakeview/publish/out.requests.txt b/acceptance/workspace/lakeview/publish/out.requests.txt index 4adba9b64a..e4802babc6 100644 --- a/acceptance/workspace/lakeview/publish/out.requests.txt +++ b/acceptance/workspace/lakeview/publish/out.requests.txt @@ -9,10 +9,6 @@ "path": "/Users/[USERNAME]" } } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "POST", "path": "/api/2.0/lakeview/dashboards", @@ -22,10 +18,6 @@ "warehouse_id": "test-warehouse" } } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "POST", "path": "/api/2.0/lakeview/dashboards/[DASHBOARD_ID]/published", diff --git a/acceptance/workspace/repos/create_with_provider/out.requests.txt b/acceptance/workspace/repos/create_with_provider/out.requests.txt index 73219c0a27..430eb33fe1 100644 --- a/acceptance/workspace/repos/create_with_provider/out.requests.txt +++ b/acceptance/workspace/repos/create_with_provider/out.requests.txt @@ -11,18 +11,10 @@ "url": "https://github.com/databricks/databricks-empty-ide-project.git" } } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.0/repos/[NUMID]" } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.0/workspace/get-status", @@ -34,10 +26,6 @@ "method": "GET", "path": "/api/2.0/repos/[NUMID]" } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "DELETE", "path": "/api/2.0/repos/[NUMID]" diff --git a/acceptance/workspace/repos/delete_by_path/out.requests.txt b/acceptance/workspace/repos/delete_by_path/out.requests.txt index f6857935ae..9fa6916971 100644 --- a/acceptance/workspace/repos/delete_by_path/out.requests.txt +++ b/acceptance/workspace/repos/delete_by_path/out.requests.txt @@ -11,10 +11,6 @@ "url": "https://github.com/databricks/databricks-empty-ide-project.git" } } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.0/workspace/get-status", @@ -26,10 +22,6 @@ "method": "GET", "path": "/api/2.0/repos/[NUMID]" } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.0/workspace/get-status", @@ -41,10 +33,6 @@ "method": "DELETE", "path": "/api/2.0/repos/[NUMID]" } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.0/workspace/get-status", diff --git a/acceptance/workspace/repos/get_errors/out.requests.txt b/acceptance/workspace/repos/get_errors/out.requests.txt index 24de0f3dd0..2bfe07b131 100644 --- a/acceptance/workspace/repos/get_errors/out.requests.txt +++ b/acceptance/workspace/repos/get_errors/out.requests.txt @@ -9,10 +9,6 @@ "path": "/Repos/me@databricks.com/doesnotexist" } } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "POST", "path": "/api/2.0/workspace/mkdirs", @@ -20,10 +16,6 @@ "path": "/not-a-repo" } } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.0/workspace/get-status", diff --git a/acceptance/workspace/repos/update/out.requests.txt b/acceptance/workspace/repos/update/out.requests.txt index ca982e372d..fa5a518dad 100644 --- a/acceptance/workspace/repos/update/out.requests.txt +++ b/acceptance/workspace/repos/update/out.requests.txt @@ -11,10 +11,6 @@ "url": "https://github.com/databricks/databricks-empty-ide-project.git" } } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "PATCH", "path": "/api/2.0/repos/[NUMID]", @@ -22,18 +18,10 @@ "branch": "update-by-id" } } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.0/repos/[NUMID]" } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.0/workspace/get-status", @@ -48,10 +36,6 @@ "branch": "update-by-path" } } -{ - "method": "GET", - "path": "/.well-known/databricks-config" -} { "method": "GET", "path": "/api/2.0/repos/[NUMID]" From 0ce5f13dd079a629a5c5bd5b5685e4f25a9482f1 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Apr 2026 13:51:08 +0200 Subject: [PATCH 16/25] Expose NewResolver to decouple caching from *config.Config Inverts the internal newResolver(cfg, ...) into an exported NewResolver(fetch) that takes an injected fetch function. Attach stays as a one-liner convenience. Unit tests for the caching logic no longer need httptest servers or PAT-authed configs; one integration test retains the end-to-end SDK wiring. Co-authored-by: Isaac --- libs/hostmetadata/resolver.go | 29 ++++--- libs/hostmetadata/resolver_test.go | 133 +++++++++++++---------------- 2 files changed, 76 insertions(+), 86 deletions(-) diff --git a/libs/hostmetadata/resolver.go b/libs/hostmetadata/resolver.go index 4af21381b9..e07cc8a17e 100644 --- a/libs/hostmetadata/resolver.go +++ b/libs/hostmetadata/resolver.go @@ -34,22 +34,19 @@ type negativeSentinel struct { Message string `json:"message"` } -// Attach installs a caching HostMetadataResolver on cfg. -func Attach(cfg *config.Config) { +// NewResolver returns a HostMetadataResolver that consults the negative cache +// before hitting the positive cache, and records failed fetches so subsequent +// calls within negativeCacheTTL skip the network entirely. The fetch function +// is invoked on positive cache miss, typically cfg.DefaultHostMetadataResolver(). +func NewResolver(fetch config.HostMetadataResolver) config.HostMetadataResolver { // cache.NewCache uses ctx only for env lookups and cleanup-walk debug // logs; there is no cancellation signal to propagate. Using a background - // context keeps Attach callable from sites without a caller ctx in scope - // (e.g. bundle.Workspace.Client). - ctx := context.Background() //nolint:gocritic // Attach has no caller ctx and cache.NewCache does not use ctx for cancellation. + // context keeps NewResolver callable from sites without a caller ctx + // in scope (e.g. bundle.Workspace.Client). + ctx := context.Background() //nolint:gocritic // no caller ctx and cache.NewCache does not use ctx for cancellation. positive := cache.NewCache(ctx, positiveCacheComponent, positiveCacheTTL, nil) negative := cache.NewCache(ctx, negativeCacheComponent, negativeCacheTTL, nil) - cfg.HostMetadataResolver = newResolver(cfg, positive, negative) -} -// newResolver returns a HostMetadataResolver that consults the negative cache -// before hitting the positive cache, and records failed fetches so subsequent -// calls within negativeCacheTTL skip the network entirely. -func newResolver(cfg *config.Config, positive, negative *cache.Cache) config.HostMetadataResolver { return func(ctx context.Context, host string) (*config.HostMetadata, error) { fp := hostFingerprint{Host: host} @@ -63,9 +60,9 @@ func newResolver(cfg *config.Config, positive, negative *cache.Cache) config.Hos return nil, nil } - // Positive cache: on miss, delegate to the SDK's default HTTP resolver. + // Positive cache: on miss, delegate to the injected fetch function. meta, err := cache.GetOrCompute[*config.HostMetadata](ctx, positive, fp, func(ctx context.Context) (*config.HostMetadata, error) { - return cfg.DefaultHostMetadataResolver()(ctx, host) + return fetch(ctx, host) }) if err != nil { log.Debugf(ctx, "[hostmetadata] fetch failed for %s, recording negative: %v", host, err) @@ -79,3 +76,9 @@ func newResolver(cfg *config.Config, positive, negative *cache.Cache) config.Hos return meta, nil } } + +// Attach installs a caching HostMetadataResolver on cfg, using the SDK's +// default HTTP resolver as the fetch function on cache miss. +func Attach(cfg *config.Config) { + cfg.HostMetadataResolver = NewResolver(cfg.DefaultHostMetadataResolver()) +} diff --git a/libs/hostmetadata/resolver_test.go b/libs/hostmetadata/resolver_test.go index 18f037f903..6c25b7dc04 100644 --- a/libs/hostmetadata/resolver_test.go +++ b/libs/hostmetadata/resolver_test.go @@ -1,6 +1,8 @@ package hostmetadata_test import ( + "context" + "errors" "net/http" "net/http/httptest" "sync/atomic" @@ -22,63 +24,78 @@ func TestAttach_SetsResolverOnConfig(t *testing.T) { assert.NotNil(t, cfg.HostMetadataResolver) } -func TestCachingResolver_CacheMiss_DelegatesToSDKFetch(t *testing.T) { +func TestNewResolver_CacheHit_SkipsFetch(t *testing.T) { t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) - var hits atomic.Int32 - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if r.URL.Path == "/.well-known/databricks-config" { - hits.Add(1) - _, _ = w.Write([]byte(`{"oidc_endpoint":"https://example.com/oidc","account_id":"acct-1","cloud":"aws"}`)) - return - } - w.WriteHeader(http.StatusNotFound) - })) - t.Cleanup(server.Close) + var calls atomic.Int32 + fetch := func(ctx context.Context, host string) (*config.HostMetadata, error) { + calls.Add(1) + return &config.HostMetadata{AccountID: "acct-1"}, nil + } + r := hostmetadata.NewResolver(fetch) - cfg := &config.Config{Host: server.URL, Token: "x", Credentials: config.PatCredentials{}} - hostmetadata.Attach(cfg) - require.NoError(t, cfg.EnsureResolved()) + m1, err := r(t.Context(), "https://example") + require.NoError(t, err) + assert.Equal(t, "acct-1", m1.AccountID) + + m2, err := r(t.Context(), "https://example") + require.NoError(t, err) + assert.Equal(t, "acct-1", m2.AccountID) - assert.Equal(t, "acct-1", cfg.AccountID) - assert.Equal(t, int32(1), hits.Load()) + assert.Equal(t, int32(1), calls.Load(), "second call must be served from cache") } -func TestCachingResolver_CacheHit_SkipsSDKFetch(t *testing.T) { +func TestNewResolver_FetchError_CachesNegative(t *testing.T) { t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) - var hits atomic.Int32 - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if r.URL.Path == "/.well-known/databricks-config" { - hits.Add(1) - _, _ = w.Write([]byte(`{"oidc_endpoint":"https://example.com/oidc","account_id":"acct-1","cloud":"aws"}`)) - return - } - w.WriteHeader(http.StatusNotFound) - })) - t.Cleanup(server.Close) + var calls atomic.Int32 + fetch := func(ctx context.Context, host string) (*config.HostMetadata, error) { + calls.Add(1) + return nil, errors.New("boom") + } + r := hostmetadata.NewResolver(fetch) - cfg1 := &config.Config{Host: server.URL, Token: "x", Credentials: config.PatCredentials{}} - hostmetadata.Attach(cfg1) - require.NoError(t, cfg1.EnsureResolved()) - require.Equal(t, int32(1), hits.Load()) + m, err := r(t.Context(), "https://example") + require.NoError(t, err, "fetch errors must be swallowed (SDK sees (nil, nil) = no metadata)") + assert.Nil(t, m) - cfg2 := &config.Config{Host: server.URL, Token: "x", Credentials: config.PatCredentials{}} - hostmetadata.Attach(cfg2) - require.NoError(t, cfg2.EnsureResolved()) + first := calls.Load() + require.GreaterOrEqual(t, first, int32(1)) - assert.Equal(t, "acct-1", cfg2.AccountID) - assert.Equal(t, int32(1), hits.Load(), "second EnsureResolved must not hit the server") + _, err = r(t.Context(), "https://example") + require.NoError(t, err) + assert.Equal(t, first, calls.Load(), "negative cache must skip the fetch") +} + +func TestNewResolver_DifferentHosts_SeparateEntries(t *testing.T) { + t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) + + fetch := func(ctx context.Context, host string) (*config.HostMetadata, error) { + return &config.HostMetadata{AccountID: "acct-for-" + host}, nil + } + r := hostmetadata.NewResolver(fetch) + + mA, err := r(t.Context(), "https://a") + require.NoError(t, err) + mB, err := r(t.Context(), "https://b") + require.NoError(t, err) + + assert.Equal(t, "acct-for-https://a", mA.AccountID) + assert.Equal(t, "acct-for-https://b", mB.AccountID) } -func TestCachingResolver_FetchError_CachesNegative(t *testing.T) { +// TestAttach_EndToEnd_CacheHitSkipsSDKFetch is an integration sanity check that +// the default fetch wiring through cfg.DefaultHostMetadataResolver() works: +// two independent *config.Config instances sharing DATABRICKS_CACHE_DIR must +// hit the well-known endpoint once, not twice. +func TestAttach_EndToEnd_CacheHitSkipsSDKFetch(t *testing.T) { t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) var hits atomic.Int32 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/.well-known/databricks-config" { hits.Add(1) - w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(`{"oidc_endpoint":"https://example.com/oidc","account_id":"acct-1","cloud":"aws"}`)) return } w.WriteHeader(http.StatusNotFound) @@ -87,43 +104,13 @@ func TestCachingResolver_FetchError_CachesNegative(t *testing.T) { cfg1 := &config.Config{Host: server.URL, Token: "x", Credentials: config.PatCredentials{}} hostmetadata.Attach(cfg1) - require.NoError(t, cfg1.EnsureResolved(), "fetch error must be non-fatal") - - firstHits := hits.Load() - require.GreaterOrEqual(t, firstHits, int32(1), "first resolve must have hit the server") + require.NoError(t, cfg1.EnsureResolved()) + require.Equal(t, int32(1), hits.Load()) cfg2 := &config.Config{Host: server.URL, Token: "x", Credentials: config.PatCredentials{}} hostmetadata.Attach(cfg2) - require.NoError(t, cfg2.EnsureResolved(), "fetch error must stay non-fatal with negative cache hit") - - assert.Equal(t, firstHits, hits.Load(), "negative cache must prevent subsequent fetches") -} - -func TestCachingResolver_DifferentHosts_SeparateEntries(t *testing.T) { - t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) - - respond := func(accountID string) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - if r.URL.Path == "/.well-known/databricks-config" { - _, _ = w.Write([]byte(`{"oidc_endpoint":"https://example.com/oidc","account_id":"` + accountID + `","cloud":"aws"}`)) - return - } - w.WriteHeader(http.StatusNotFound) - } - } - serverA := httptest.NewServer(respond("acct-A")) - serverB := httptest.NewServer(respond("acct-B")) - t.Cleanup(serverA.Close) - t.Cleanup(serverB.Close) - - cfgA := &config.Config{Host: serverA.URL, Token: "x", Credentials: config.PatCredentials{}} - cfgB := &config.Config{Host: serverB.URL, Token: "x", Credentials: config.PatCredentials{}} - hostmetadata.Attach(cfgA) - hostmetadata.Attach(cfgB) - - require.NoError(t, cfgA.EnsureResolved()) - require.NoError(t, cfgB.EnsureResolved()) + require.NoError(t, cfg2.EnsureResolved()) - assert.Equal(t, "acct-A", cfgA.AccountID) - assert.Equal(t, "acct-B", cfgB.AccountID) + assert.Equal(t, "acct-1", cfg2.AccountID) + assert.Equal(t, int32(1), hits.Load(), "second EnsureResolved must not hit the server") } From ec2bd69898e672d26e98a3cf8db729a640445423 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Apr 2026 14:00:39 +0200 Subject: [PATCH 17/25] Probe positive cache first, skip caching transient errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flips the resolver so the happy path is one disk read: positive cache wraps the miss flow, which now probes negative and falls through to fetch only on true miss. Context cancellation and deadline errors are no longer written to the negative cache because they say nothing about the host's long-term availability. Regenerates cache/telemetry acceptance outputs — the synthetic negative-cache probe no longer runs on cache hits. Co-authored-by: Isaac --- acceptance/cache/clear/output.txt | 8 +-- acceptance/cache/simple/output.txt | 6 +- acceptance/telemetry/failure/output.txt | 6 +- .../telemetry/partial-success/output.txt | 6 +- acceptance/telemetry/skipped/output.txt | 6 +- acceptance/telemetry/success/output.txt | 6 +- acceptance/telemetry/timeout/output.txt | 6 +- libs/hostmetadata/resolver.go | 57 +++++++++++-------- libs/hostmetadata/resolver_test.go | 21 +++++++ 9 files changed, 73 insertions(+), 49 deletions(-) diff --git a/acceptance/cache/clear/output.txt b/acceptance/cache/clear/output.txt index dcd7496fd2..0469f3122f 100644 --- a/acceptance/cache/clear/output.txt +++ b/acceptance/cache/clear/output.txt @@ -3,10 +3,10 @@ [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing -[DEBUG_TIMESTAMP] Debug: [Local Cache] error while computing: not cached [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing +[DEBUG_TIMESTAMP] Debug: [Local Cache] error while computing: not cached [DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) @@ -15,10 +15,6 @@ === Second call in a session is expected to be a cache hit [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] -[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) -[DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing -[DEBUG_TIMESTAMP] Debug: [Local Cache] error while computing: not cached -[DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] cache hit [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] cache hit @@ -30,10 +26,10 @@ Cache cleared successfully from [TEST_TMP_DIR]/.cache [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing -[DEBUG_TIMESTAMP] Debug: [Local Cache] error while computing: not cached [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing +[DEBUG_TIMESTAMP] Debug: [Local Cache] error while computing: not cached [DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) diff --git a/acceptance/cache/simple/output.txt b/acceptance/cache/simple/output.txt index e9235ad409..2459ae8822 100644 --- a/acceptance/cache/simple/output.txt +++ b/acceptance/cache/simple/output.txt @@ -3,10 +3,10 @@ [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing -[DEBUG_TIMESTAMP] Debug: [Local Cache] error while computing: not cached [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing +[DEBUG_TIMESTAMP] Debug: [Local Cache] error while computing: not cached [DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) @@ -15,10 +15,6 @@ === Second call in a session is expected to be a cache hit [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] -[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) -[DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing -[DEBUG_TIMESTAMP] Debug: [Local Cache] error while computing: not cached -[DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] cache hit [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] cache hit diff --git a/acceptance/telemetry/failure/output.txt b/acceptance/telemetry/failure/output.txt index ebc7d4fd27..1917aaf55d 100644 --- a/acceptance/telemetry/failure/output.txt +++ b/acceptance/telemetry/failure/output.txt @@ -2,12 +2,12 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID -HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID +HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { diff --git a/acceptance/telemetry/partial-success/output.txt b/acceptance/telemetry/partial-success/output.txt index 7514fcd43f..f8317258eb 100644 --- a/acceptance/telemetry/partial-success/output.txt +++ b/acceptance/telemetry/partial-success/output.txt @@ -2,12 +2,12 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID -HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID +HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { diff --git a/acceptance/telemetry/skipped/output.txt b/acceptance/telemetry/skipped/output.txt index e4a5d7ed0e..a1a0ec1491 100644 --- a/acceptance/telemetry/skipped/output.txt +++ b/acceptance/telemetry/skipped/output.txt @@ -2,12 +2,12 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID -HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID +HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { diff --git a/acceptance/telemetry/success/output.txt b/acceptance/telemetry/success/output.txt index fa17ff9d70..6e56e4384e 100644 --- a/acceptance/telemetry/success/output.txt +++ b/acceptance/telemetry/success/output.txt @@ -2,12 +2,12 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID -HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID +HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { diff --git a/acceptance/telemetry/timeout/output.txt b/acceptance/telemetry/timeout/output.txt index 11aeef2ebd..96644b2e25 100644 --- a/acceptance/telemetry/timeout/output.txt +++ b/acceptance/telemetry/timeout/output.txt @@ -2,12 +2,12 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID -HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID +HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { diff --git a/libs/hostmetadata/resolver.go b/libs/hostmetadata/resolver.go index e07cc8a17e..7aad8716e0 100644 --- a/libs/hostmetadata/resolver.go +++ b/libs/hostmetadata/resolver.go @@ -23,6 +23,11 @@ const ( // anything, since GetOrCompute only writes on success. var errNotCached = errors.New("not cached") +// errNegativeHit is returned from the positive-cache compute callback when the +// negative cache already has a sentinel for the host. It signals the outer +// resolver to return (nil, nil) without running fetch or writing to positive. +var errNegativeHit = errors.New("negative cache hit") + // hostFingerprint is the cache key for a given host. type hostFingerprint struct { Host string `json:"host"` @@ -34,10 +39,11 @@ type negativeSentinel struct { Message string `json:"message"` } -// NewResolver returns a HostMetadataResolver that consults the negative cache -// before hitting the positive cache, and records failed fetches so subsequent -// calls within negativeCacheTTL skip the network entirely. The fetch function -// is invoked on positive cache miss, typically cfg.DefaultHostMetadataResolver(). +// NewResolver returns a HostMetadataResolver backed by a positive and negative +// file cache. On positive hit it returns the cached metadata; on miss it +// probes the negative cache, then falls through to fetch and records failures +// so subsequent calls within negativeCacheTTL skip the network. The fetch +// function is invoked on miss, typically cfg.DefaultHostMetadataResolver(). func NewResolver(fetch config.HostMetadataResolver) config.HostMetadataResolver { // cache.NewCache uses ctx only for env lookups and cleanup-walk debug // logs; there is no cancellation signal to propagate. Using a background @@ -50,30 +56,35 @@ func NewResolver(fetch config.HostMetadataResolver) config.HostMetadataResolver return func(ctx context.Context, host string) (*config.HostMetadata, error) { fp := hostFingerprint{Host: host} - // Check negative cache first. errNotCached makes GetOrCompute skip the - // write, so this is a read-only probe. - sentinel, err := cache.GetOrCompute[*negativeSentinel](ctx, negative, fp, func(ctx context.Context) (*negativeSentinel, error) { - return nil, errNotCached - }) - if err == nil && sentinel != nil && sentinel.Error { - log.Debugf(ctx, "[hostmetadata] negative cache hit for %s: %s", host, sentinel.Message) - return nil, nil - } - - // Positive cache: on miss, delegate to the injected fetch function. + // Positive cache wraps the whole miss path so that the happy path (hit) + // is a single disk read — no synthetic probe, no negative-cache traffic. meta, err := cache.GetOrCompute[*config.HostMetadata](ctx, positive, fp, func(ctx context.Context) (*config.HostMetadata, error) { + sentinel, sErr := cache.GetOrCompute[*negativeSentinel](ctx, negative, fp, func(ctx context.Context) (*negativeSentinel, error) { + return nil, errNotCached + }) + if sErr == nil && sentinel != nil && sentinel.Error { + log.Debugf(ctx, "[hostmetadata] negative cache hit for %s: %s", host, sentinel.Message) + return nil, errNegativeHit + } return fetch(ctx, host) }) - if err != nil { - log.Debugf(ctx, "[hostmetadata] fetch failed for %s, recording negative: %v", host, err) - // Best-effort write to negative cache; ignore errors. - _, _ = cache.GetOrCompute[*negativeSentinel](ctx, negative, fp, func(ctx context.Context) (*negativeSentinel, error) { - return &negativeSentinel{Error: true, Message: err.Error()}, nil - }) + if err == nil { + return meta, nil + } + if errors.Is(err, errNegativeHit) { return nil, nil } - - return meta, nil + // Transient errors (cancellation, deadline) say nothing about the + // host's long-term availability — don't cache them. + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return nil, nil + } + log.Debugf(ctx, "[hostmetadata] fetch failed for %s, recording negative: %v", host, err) + // Best-effort write; ignore failures. + _, _ = cache.GetOrCompute[*negativeSentinel](ctx, negative, fp, func(ctx context.Context) (*negativeSentinel, error) { + return &negativeSentinel{Error: true, Message: err.Error()}, nil + }) + return nil, nil } } diff --git a/libs/hostmetadata/resolver_test.go b/libs/hostmetadata/resolver_test.go index 6c25b7dc04..15022c0800 100644 --- a/libs/hostmetadata/resolver_test.go +++ b/libs/hostmetadata/resolver_test.go @@ -67,6 +67,27 @@ func TestNewResolver_FetchError_CachesNegative(t *testing.T) { assert.Equal(t, first, calls.Load(), "negative cache must skip the fetch") } +func TestNewResolver_CancellationNotCached(t *testing.T) { + t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) + + var calls atomic.Int32 + fetch := func(ctx context.Context, host string) (*config.HostMetadata, error) { + calls.Add(1) + return nil, context.Canceled + } + r := hostmetadata.NewResolver(fetch) + + m1, err := r(t.Context(), "https://example") + require.NoError(t, err) + assert.Nil(t, m1) + + m2, err := r(t.Context(), "https://example") + require.NoError(t, err) + assert.Nil(t, m2) + + assert.Equal(t, int32(2), calls.Load(), "cancellation must not be negatively cached") +} + func TestNewResolver_DifferentHosts_SeparateEntries(t *testing.T) { t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) From 8ce5516c817ee5a0bf4674b926f03b1af7361937 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Apr 2026 14:31:33 +0200 Subject: [PATCH 18/25] Match sanitized dev version in test replacements GetSanitizedVersion replaces + with - in build version metadata for filesystem safety, but the [DEV_VERSION] replacement regex only covered the + form. Cache paths use the sanitized form, so telemetry tests failed across machines with different git HEAD SHAs. Regex now accepts either + or - before the SHA suffix. Co-authored-by: Isaac --- acceptance/telemetry/failure/output.txt | 4 ++-- acceptance/telemetry/partial-success/output.txt | 4 ++-- acceptance/telemetry/skipped/output.txt | 4 ++-- acceptance/telemetry/success/output.txt | 4 ++-- acceptance/telemetry/timeout/output.txt | 4 ++-- libs/testdiff/replacement.go | 6 ++++-- 6 files changed, 14 insertions(+), 12 deletions(-) diff --git a/acceptance/telemetry/failure/output.txt b/acceptance/telemetry/failure/output.txt index 1917aaf55d..31a5bdbbe5 100644 --- a/acceptance/telemetry/failure/output.txt +++ b/acceptance/telemetry/failure/output.txt @@ -2,10 +2,10 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config diff --git a/acceptance/telemetry/partial-success/output.txt b/acceptance/telemetry/partial-success/output.txt index f8317258eb..b80379ce08 100644 --- a/acceptance/telemetry/partial-success/output.txt +++ b/acceptance/telemetry/partial-success/output.txt @@ -2,10 +2,10 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config diff --git a/acceptance/telemetry/skipped/output.txt b/acceptance/telemetry/skipped/output.txt index a1a0ec1491..a8650d25f3 100644 --- a/acceptance/telemetry/skipped/output.txt +++ b/acceptance/telemetry/skipped/output.txt @@ -2,10 +2,10 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config diff --git a/acceptance/telemetry/success/output.txt b/acceptance/telemetry/success/output.txt index 6e56e4384e..14ec4834eb 100644 --- a/acceptance/telemetry/success/output.txt +++ b/acceptance/telemetry/success/output.txt @@ -2,10 +2,10 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config diff --git a/acceptance/telemetry/timeout/output.txt b/acceptance/telemetry/timeout/output.txt index 96644b2e25..6a80fd3bc8 100644 --- a/acceptance/telemetry/timeout/output.txt +++ b/acceptance/telemetry/timeout/output.txt @@ -2,10 +2,10 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]-0c8b5a07aebf/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID +HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config diff --git a/libs/testdiff/replacement.go b/libs/testdiff/replacement.go index 188a623cc8..e293e74ede 100644 --- a/libs/testdiff/replacement.go +++ b/libs/testdiff/replacement.go @@ -25,8 +25,10 @@ var ( uuidRegex = regexp.MustCompile(`[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}`) numIdRegex = regexp.MustCompile(`[0-9]{3,}`) privatePathRegex = regexp.MustCompile(`(/tmp|/private)(/.*)/([a-zA-Z0-9]+)`) - // Version could v0.0.0-dev+21e1aacf518a or just v0.0.0-dev (the latter is currently the case on Windows) - devVersionRegex = regexp.MustCompile(`0\.0\.0-dev(\+[a-f0-9]{10,16})?`) + // Version could be v0.0.0-dev+21e1aacf518a, v0.0.0-dev-21e1aacf518a (the + // filesystem-sanitized form used in cache paths), or just v0.0.0-dev + // (currently the case on Windows). + devVersionRegex = regexp.MustCompile(`0\.0\.0-dev([-+][a-f0-9]{10,16})?`) // Matches databricks-sdk-go/0.90.0 sdkVersionRegex = regexp.MustCompile(`databricks-sdk-go/[0-9]+\.[0-9]+\.[0-9]+`) ) From 21fe664ebf51ff7cbabf7a2abce8348e03b315f4 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Apr 2026 15:38:17 +0200 Subject: [PATCH 19/25] Skip stat-not-found debug log on cache miss MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit os.Stat on a missing cache file returns an OS-specific error message (Unix: "no such file or directory"; Windows: "The system cannot find the file specified."), causing acceptance-test goldens to diverge between platforms. The error is also pure noise — the follow-up "cache miss, computing" line conveys the same information. Drop the log for fs.ErrNotExist; keep it for genuine stat failures (permissions, corruption). Co-authored-by: Isaac --- acceptance/cache/clear/output.txt | 6 ------ acceptance/cache/simple/output.txt | 3 --- acceptance/telemetry/failure/output.txt | 2 -- acceptance/telemetry/partial-success/output.txt | 2 -- acceptance/telemetry/skipped/output.txt | 2 -- acceptance/telemetry/success/output.txt | 2 -- acceptance/telemetry/timeout/output.txt | 2 -- libs/cache/file_cache.go | 10 +++++++++- 8 files changed, 9 insertions(+), 20 deletions(-) diff --git a/acceptance/cache/clear/output.txt b/acceptance/cache/clear/output.txt index 0469f3122f..1ccff0d23f 100644 --- a/acceptance/cache/clear/output.txt +++ b/acceptance/cache/clear/output.txt @@ -1,15 +1,12 @@ === First call in a session is expected to be a cache miss: [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] -[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] -[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing [DEBUG_TIMESTAMP] Debug: [Local Cache] error while computing: not cached [DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] -[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing [DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result @@ -24,14 +21,11 @@ Cache cleared successfully from [TEST_TMP_DIR]/.cache === First call after a clear is expected to be a cache miss: [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] -[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] -[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing [DEBUG_TIMESTAMP] Debug: [Local Cache] error while computing: not cached [DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] -[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing [DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result diff --git a/acceptance/cache/simple/output.txt b/acceptance/cache/simple/output.txt index 2459ae8822..16ab339d68 100644 --- a/acceptance/cache/simple/output.txt +++ b/acceptance/cache/simple/output.txt @@ -1,15 +1,12 @@ === First call in a session is expected to be a cache miss: [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] -[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] -[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing [DEBUG_TIMESTAMP] Debug: [Local Cache] error while computing: not cached [DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] -[DEBUG_TIMESTAMP] Debug: [Local Cache] failed to stat cache file: (redacted) [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing [DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result diff --git a/acceptance/telemetry/failure/output.txt b/acceptance/telemetry/failure/output.txt index 31a5bdbbe5..96046fc400 100644 --- a/acceptance/telemetry/failure/output.txt +++ b/acceptance/telemetry/failure/output.txt @@ -2,10 +2,8 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config diff --git a/acceptance/telemetry/partial-success/output.txt b/acceptance/telemetry/partial-success/output.txt index b80379ce08..30b0d92675 100644 --- a/acceptance/telemetry/partial-success/output.txt +++ b/acceptance/telemetry/partial-success/output.txt @@ -2,10 +2,8 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config diff --git a/acceptance/telemetry/skipped/output.txt b/acceptance/telemetry/skipped/output.txt index a8650d25f3..6d03b4d138 100644 --- a/acceptance/telemetry/skipped/output.txt +++ b/acceptance/telemetry/skipped/output.txt @@ -2,10 +2,8 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config diff --git a/acceptance/telemetry/success/output.txt b/acceptance/telemetry/success/output.txt index 14ec4834eb..87a494f571 100644 --- a/acceptance/telemetry/success/output.txt +++ b/acceptance/telemetry/success/output.txt @@ -2,10 +2,8 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config diff --git a/acceptance/telemetry/timeout/output.txt b/acceptance/telemetry/timeout/output.txt index 6a80fd3bc8..dafadbf22e 100644 --- a/acceptance/telemetry/timeout/output.txt +++ b/acceptance/telemetry/timeout/output.txt @@ -2,10 +2,8 @@ >>> [CLI] selftest send-telemetry --debug HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] failed to stat cache file: stat [TEST_TMP_DIR]_PARENT/002/.cache/[DEV_VERSION]/host-metadata-negative/[SHA256_HASH].json: no such file or directory pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config diff --git a/libs/cache/file_cache.go b/libs/cache/file_cache.go index 44e9c5aba5..ee00f82590 100644 --- a/libs/cache/file_cache.go +++ b/libs/cache/file_cache.go @@ -2,7 +2,9 @@ package cache import ( "context" + "errors" "fmt" + "io/fs" "os" "path/filepath" "sync" @@ -217,7 +219,13 @@ func (fc *fileCache) readFromCacheJSON(ctx context.Context, cachePath string) ([ // Check file modification time for expiry info, err := os.Stat(cachePath) if err != nil { - log.Debugf(ctx, "[Local Cache] failed to stat cache file: %v", err) + // ErrNotExist is the common miss case; logging it adds noise and + // diverges across OSes (Unix: "no such file or directory"; + // Windows: "The system cannot find the file specified."). The + // follow-up "cache miss, computing" line already captures it. + if !errors.Is(err, fs.ErrNotExist) { + log.Debugf(ctx, "[Local Cache] failed to stat cache file: %v", err) + } return nil, false } From fae41c628216d851428cded6c272e5a44e86eee2 Mon Sep 17 00:00:00 2001 From: simon Date: Tue, 21 Apr 2026 08:10:45 +0200 Subject: [PATCH 20/25] Collapse per-site Attach wiring into a single factory registration SDK v0.128.0 (databricks/databricks-sdk-go#1636) adds config.DefaultHostMetadataResolverFactory so a package can install a single hook that every Config picks up on EnsureResolved, without per-site wiring. Replaces ten hostmetadata.Attach(cfg) call sites across seven files and the injection guardrail test with two pieces: - libs/hostmetadata/resolver.go: init() sets config.DefaultHostMetadataResolverFactory to wrap cfg.DefaultHostMetadataResolver() in the caching resolver. - main.go: blank import of libs/hostmetadata triggers that init() at startup so every *config.Config the CLI constructs picks up the cached lookup automatically. Co-authored-by: Isaac --- NEXT_CHANGELOG.md | 1 + bundle/config/workspace.go | 2 - cmd/api/api.go | 3 - cmd/auth/env.go | 2 - cmd/auth/profiles.go | 2 - cmd/labs/project/entrypoint.go | 7 +- cmd/root/auth.go | 13 +- libs/auth/arguments.go | 2 - libs/hostmetadata/injection_guardrail_test.go | 114 ------------------ libs/hostmetadata/resolver.go | 18 +-- libs/hostmetadata/resolver_test.go | 23 ++-- main.go | 4 + 12 files changed, 28 insertions(+), 163 deletions(-) delete mode 100644 libs/hostmetadata/injection_guardrail_test.go diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index ede82f7779..e997395619 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -8,6 +8,7 @@ * Added `--limit` flag to all paginated list commands for client-side result capping ([#4984](https://github.com/databricks/cli/pull/4984)). * Accept `yes` in addition to `y` for confirmation prompts, and show `[y/N]` to indicate that no is the default. +* Cache `/.well-known/databricks-config` lookups under `~/.cache/databricks//host-metadata/` so repeat CLI invocations against the same host skip the ~700ms discovery round trip. ### Bundles * Remove `experimental-jobs-as-code` template, superseded by `pydabs` ([#4999](https://github.com/databricks/cli/pull/4999)). diff --git a/bundle/config/workspace.go b/bundle/config/workspace.go index 4009c8f3d9..c699dc070b 100644 --- a/bundle/config/workspace.go +++ b/bundle/config/workspace.go @@ -6,7 +6,6 @@ import ( "github.com/databricks/cli/libs/auth" "github.com/databricks/cli/libs/databrickscfg" - "github.com/databricks/cli/libs/hostmetadata" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/config" "github.com/databricks/databricks-sdk-go/marshal" @@ -164,7 +163,6 @@ func (w *Workspace) Client() (*databricks.WorkspaceClient, error) { w.NormalizeHostURL() cfg := w.Config() - hostmetadata.Attach(cfg) // If only the host is configured, we try and unambiguously match it to // a profile in the user's databrickscfg file. Override the default loaders. diff --git a/cmd/api/api.go b/cmd/api/api.go index 28f856c0ac..057c8f2246 100644 --- a/cmd/api/api.go +++ b/cmd/api/api.go @@ -8,7 +8,6 @@ import ( "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/flags" - "github.com/databricks/cli/libs/hostmetadata" "github.com/databricks/databricks-sdk-go/client" "github.com/databricks/databricks-sdk-go/config" "github.com/spf13/cobra" @@ -56,8 +55,6 @@ func makeCommand(method string) *cobra.Command { cfg.Profile = profileFlag.Value.String() } - hostmetadata.Attach(cfg) - api, err := client.New(cfg) if err != nil { return err diff --git a/cmd/auth/env.go b/cmd/auth/env.go index ddaca011db..11149af8c0 100644 --- a/cmd/auth/env.go +++ b/cmd/auth/env.go @@ -11,7 +11,6 @@ import ( "strings" "github.com/databricks/cli/libs/databrickscfg/profile" - "github.com/databricks/cli/libs/hostmetadata" "github.com/databricks/databricks-sdk-go/config" "github.com/spf13/cobra" "gopkg.in/ini.v1" @@ -108,7 +107,6 @@ func newEnvCommand() *cobra.Command { Host: host, Profile: profile, } - hostmetadata.Attach(cfg) if profile != "" { cfg.Profile = profile } else if cfg.Host == "" { diff --git a/cmd/auth/profiles.go b/cmd/auth/profiles.go index f6d5114824..51c397a9ea 100644 --- a/cmd/auth/profiles.go +++ b/cmd/auth/profiles.go @@ -13,7 +13,6 @@ import ( "github.com/databricks/cli/libs/databrickscfg" "github.com/databricks/cli/libs/databrickscfg/profile" "github.com/databricks/cli/libs/env" - "github.com/databricks/cli/libs/hostmetadata" "github.com/databricks/cli/libs/log" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/config" @@ -43,7 +42,6 @@ func (c *profileMetadata) Load(ctx context.Context, configFilePath string, skipV Profile: c.Name, DatabricksCliPath: env.Get(ctx, "DATABRICKS_CLI_PATH"), } - hostmetadata.Attach(cfg) _ = cfg.EnsureResolved() if cfg.IsAws() { c.Cloud = "aws" diff --git a/cmd/labs/project/entrypoint.go b/cmd/labs/project/entrypoint.go index 685c916df7..335f7c1301 100644 --- a/cmd/labs/project/entrypoint.go +++ b/cmd/labs/project/entrypoint.go @@ -14,7 +14,6 @@ import ( "github.com/databricks/cli/internal/build" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/env" - "github.com/databricks/cli/libs/hostmetadata" "github.com/databricks/cli/libs/log" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/config" @@ -147,16 +146,14 @@ func (e *Entrypoint) envAwareConfig(ctx context.Context) (*config.Config, error) if err != nil { return nil, err } - cfg := &config.Config{ + return &config.Config{ ConfigFile: filepath.Join(home, ".databrickscfg"), Loaders: []config.Loader{ env.NewConfigLoader(ctx), config.ConfigAttributes, config.ConfigFile, }, - } - hostmetadata.Attach(cfg) - return cfg, nil + }, nil } func (e *Entrypoint) envAwareConfigWithProfile(ctx context.Context, profile string) (*config.Config, error) { diff --git a/cmd/root/auth.go b/cmd/root/auth.go index 8011e8f64f..4a4bd9ab87 100644 --- a/cmd/root/auth.go +++ b/cmd/root/auth.go @@ -12,7 +12,6 @@ import ( "github.com/databricks/cli/libs/databrickscfg" "github.com/databricks/cli/libs/databrickscfg/profile" envlib "github.com/databricks/cli/libs/env" - "github.com/databricks/cli/libs/hostmetadata" "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/logdiag" "github.com/databricks/databricks-sdk-go" @@ -111,9 +110,7 @@ func accountClientOrPrompt(ctx context.Context, cfg *config.Config, allowPrompt if err != nil { return nil, err } - promptCfg := &databricks.Config{Profile: profile} - hostmetadata.Attach((*config.Config)(promptCfg)) - a, err = databricks.NewAccountClient(promptCfg) + a, err = databricks.NewAccountClient(&databricks.Config{Profile: profile}) if err == nil { err = a.Config.Authenticate(emptyHttpRequest(ctx)) if err != nil { @@ -159,8 +156,6 @@ func MustAccountClient(cmd *cobra.Command, args []string) error { ctx = cmdctx.SetConfigUsed(ctx, cfg) cmd.SetContext(ctx) - hostmetadata.Attach(cfg) - profiler := profile.GetProfiler(ctx) resolveDefaultProfile(ctx, cfg) @@ -234,9 +229,7 @@ func workspaceClientOrPrompt(ctx context.Context, cfg *config.Config, allowPromp if err != nil { return nil, err } - promptCfg := &databricks.Config{Profile: profile} - hostmetadata.Attach((*config.Config)(promptCfg)) - w, err = databricks.NewWorkspaceClient(promptCfg) + w, err = databricks.NewWorkspaceClient(&databricks.Config{Profile: profile}) if err == nil { err = w.Config.Authenticate(emptyHttpRequest(ctx)) if err != nil { @@ -269,8 +262,6 @@ func MustWorkspaceClient(cmd *cobra.Command, args []string) error { ctx = cmdctx.SetConfigUsed(cmd.Context(), cfg) cmd.SetContext(ctx) - hostmetadata.Attach(cfg) - // Try to load a bundle configuration if we're allowed to by the caller (see `./auth_options.go`). if !shouldSkipLoadBundle(cmd.Context()) { b := TryConfigureBundle(cmd) diff --git a/libs/auth/arguments.go b/libs/auth/arguments.go index 32f19884d6..4f724cc801 100644 --- a/libs/auth/arguments.go +++ b/libs/auth/arguments.go @@ -3,7 +3,6 @@ package auth import ( "strings" - "github.com/databricks/cli/libs/hostmetadata" "github.com/databricks/databricks-sdk-go/config" "github.com/databricks/databricks-sdk-go/credentials/u2m" ) @@ -49,7 +48,6 @@ func (a AuthArguments) ToOAuthArgument() (u2m.OAuthArgument, error) { // based on the explicit fields provided. Loaders: []config.Loader{config.ConfigAttributes}, } - hostmetadata.Attach(cfg) if a.DiscoveryURL != "" { cfg.DiscoveryURL = a.DiscoveryURL diff --git a/libs/hostmetadata/injection_guardrail_test.go b/libs/hostmetadata/injection_guardrail_test.go deleted file mode 100644 index b610247d17..0000000000 --- a/libs/hostmetadata/injection_guardrail_test.go +++ /dev/null @@ -1,114 +0,0 @@ -package hostmetadata_test - -import ( - "io/fs" - "os" - "path/filepath" - "regexp" - "strings" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -// allowlist maps repo-relative paths (forward slashes) to a short reason the -// site doesn't need hostmetadata.Attach. When you add a new entry, write a -// specific reason — "no resolution" is too vague; say "SaveToProfile: write-only". -var allowlist = map[string]string{ - "cmd/auth/auth.go": "CanonicalHostName only (URL munging)", - "cmd/auth/resolve.go": "CanonicalHostName only", - "cmd/auth/logout.go": "CanonicalHostName only", - "cmd/auth/token.go": "SaveToProfile: write-only", - "cmd/configure/configure.go": "SaveToProfile: write-only", - "libs/databrickscfg/profile/profile.go": "CanonicalHostName only", - "libs/databrickscfg/profile/profiler.go": "CanonicalHostName only", - "libs/testproxy/server.go": "test helper, no real auth", - "acceptance/internal/prepare_server.go": "acceptance test infrastructure", - "libs/env/loader.go": "doc comment only, no struct construction", - // Task 6 deliberately skipped these two sites: - // cmd/auth/login.go:setHostAndAccountId (used for HostType() pattern matching only) - // cmd/root/auth.go:~290 (cfg reassigned from already-resolved client) - // Both are in files that ALSO contain Attach calls, so they don't appear - // in this allowlist — the file-level "has Attach" check covers them. -} - -// constructionPattern matches both `config.Config{` and `databricks.Config{` -// struct literals — the two forms we construct in this repo. -var constructionPattern = regexp.MustCompile(`\b(?:config|databricks)\.Config\{`) - -func TestConfigConstructionSitesHaveAttach(t *testing.T) { - repoRoot := findRepoRoot(t) - - var offenders []string - err := filepath.WalkDir(repoRoot, func(path string, d fs.DirEntry, err error) error { - if err != nil { - return err - } - if d.IsDir() { - // Skip: .git, vendor, .claude (worktrees), acceptance test output dirs. - name := d.Name() - if name == ".git" || name == "vendor" || name == ".claude" || name == "node_modules" { - return fs.SkipDir - } - return nil - } - if !strings.HasSuffix(path, ".go") { - return nil - } - // Skip test files — we only want production code. - if strings.HasSuffix(path, "_test.go") { - return nil - } - - rel, err := filepath.Rel(repoRoot, path) - if err != nil { - return err - } - relSlash := filepath.ToSlash(rel) - - // Allowlist check: if the file is explicitly allowlisted, skip. - if _, ok := allowlist[relSlash]; ok { - return nil - } - - src, err := os.ReadFile(path) - if err != nil { - return err - } - content := string(src) - - if !constructionPattern.MatchString(content) { - return nil - } - if strings.Contains(content, "hostmetadata.Attach(") { - return nil - } - - offenders = append(offenders, relSlash) - return nil - }) - require.NoError(t, err) - - assert.Empty(t, offenders, - "the following files construct *config.Config but do not call hostmetadata.Attach. "+ - "Either add `hostmetadata.Attach(cfg)` before the first resolve, "+ - "or add the file to the allowlist in %s with a specific reason.", - "libs/hostmetadata/injection_guardrail_test.go") -} - -// findRepoRoot walks up from the test's working directory until it finds go.mod. -func findRepoRoot(t *testing.T) string { - dir, err := os.Getwd() - require.NoError(t, err) - for { - if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil { - return dir - } - parent := filepath.Dir(dir) - if parent == dir { - t.Fatal("could not find go.mod walking up from " + dir) - } - dir = parent - } -} diff --git a/libs/hostmetadata/resolver.go b/libs/hostmetadata/resolver.go index 7aad8716e0..e4f7806885 100644 --- a/libs/hostmetadata/resolver.go +++ b/libs/hostmetadata/resolver.go @@ -1,5 +1,9 @@ // Package hostmetadata provides a cached implementation of the SDK's // HostMetadataResolver, backed by the CLI's shared file cache. +// +// Importing this package (typically via a blank import from main) installs +// [config.DefaultHostMetadataResolverFactory] so every *config.Config the +// CLI constructs automatically gets the cached resolver on first EnsureResolved. package hostmetadata import ( @@ -39,6 +43,12 @@ type negativeSentinel struct { Message string `json:"message"` } +func init() { + config.DefaultHostMetadataResolverFactory = func(cfg *config.Config) config.HostMetadataResolver { + return NewResolver(cfg.DefaultHostMetadataResolver()) + } +} + // NewResolver returns a HostMetadataResolver backed by a positive and negative // file cache. On positive hit it returns the cached metadata; on miss it // probes the negative cache, then falls through to fetch and records failures @@ -48,7 +58,7 @@ func NewResolver(fetch config.HostMetadataResolver) config.HostMetadataResolver // cache.NewCache uses ctx only for env lookups and cleanup-walk debug // logs; there is no cancellation signal to propagate. Using a background // context keeps NewResolver callable from sites without a caller ctx - // in scope (e.g. bundle.Workspace.Client). + // in scope (e.g. the factory invoked from Config.EnsureResolved). ctx := context.Background() //nolint:gocritic // no caller ctx and cache.NewCache does not use ctx for cancellation. positive := cache.NewCache(ctx, positiveCacheComponent, positiveCacheTTL, nil) negative := cache.NewCache(ctx, negativeCacheComponent, negativeCacheTTL, nil) @@ -87,9 +97,3 @@ func NewResolver(fetch config.HostMetadataResolver) config.HostMetadataResolver return nil, nil } } - -// Attach installs a caching HostMetadataResolver on cfg, using the SDK's -// default HTTP resolver as the fetch function on cache miss. -func Attach(cfg *config.Config) { - cfg.HostMetadataResolver = NewResolver(cfg.DefaultHostMetadataResolver()) -} diff --git a/libs/hostmetadata/resolver_test.go b/libs/hostmetadata/resolver_test.go index 15022c0800..eb68f8b30a 100644 --- a/libs/hostmetadata/resolver_test.go +++ b/libs/hostmetadata/resolver_test.go @@ -14,14 +14,9 @@ import ( "github.com/stretchr/testify/require" ) -func TestAttach_SetsResolverOnConfig(t *testing.T) { - t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) - cfg := &config.Config{Host: "https://example.cloud.databricks.com"} - require.Nil(t, cfg.HostMetadataResolver) - - hostmetadata.Attach(cfg) - - assert.NotNil(t, cfg.HostMetadataResolver) +func TestInit_RegistersDefaultHostMetadataResolverFactory(t *testing.T) { + require.NotNil(t, config.DefaultHostMetadataResolverFactory, + "importing hostmetadata must register a factory so every *config.Config picks up the cached resolver") } func TestNewResolver_CacheHit_SkipsFetch(t *testing.T) { @@ -105,11 +100,11 @@ func TestNewResolver_DifferentHosts_SeparateEntries(t *testing.T) { assert.Equal(t, "acct-for-https://b", mB.AccountID) } -// TestAttach_EndToEnd_CacheHitSkipsSDKFetch is an integration sanity check that -// the default fetch wiring through cfg.DefaultHostMetadataResolver() works: -// two independent *config.Config instances sharing DATABRICKS_CACHE_DIR must -// hit the well-known endpoint once, not twice. -func TestAttach_EndToEnd_CacheHitSkipsSDKFetch(t *testing.T) { +// TestFactory_EndToEnd_CacheHitSkipsSDKFetch is an integration sanity check +// that importing hostmetadata installs a factory which back-fills every +// *config.Config with a cached resolver. Two independent configs sharing +// DATABRICKS_CACHE_DIR must hit the well-known endpoint once, not twice. +func TestFactory_EndToEnd_CacheHitSkipsSDKFetch(t *testing.T) { t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) var hits atomic.Int32 @@ -124,12 +119,10 @@ func TestAttach_EndToEnd_CacheHitSkipsSDKFetch(t *testing.T) { t.Cleanup(server.Close) cfg1 := &config.Config{Host: server.URL, Token: "x", Credentials: config.PatCredentials{}} - hostmetadata.Attach(cfg1) require.NoError(t, cfg1.EnsureResolved()) require.Equal(t, int32(1), hits.Load()) cfg2 := &config.Config{Host: server.URL, Token: "x", Credentials: config.PatCredentials{}} - hostmetadata.Attach(cfg2) require.NoError(t, cfg2.EnsureResolved()) assert.Equal(t, "acct-1", cfg2.AccountID) diff --git a/main.go b/main.go index c568e6adbd..e81dde6946 100644 --- a/main.go +++ b/main.go @@ -6,6 +6,10 @@ import ( "github.com/databricks/cli/cmd" "github.com/databricks/cli/cmd/root" + + // Registers a disk-cached HostMetadataResolver factory on the SDK so every + // *config.Config the CLI constructs reuses the cached /.well-known lookup. + _ "github.com/databricks/cli/libs/hostmetadata" ) func main() { From 9f32fc8e791ed9cbd4c86d4aedeab2e771f1dfad Mon Sep 17 00:00:00 2001 From: simon Date: Tue, 21 Apr 2026 08:59:18 +0200 Subject: [PATCH 21/25] Drop redundant factory-registration test TestFactory_EndToEnd_CacheHitSkipsSDKFetch already covers the same case: if the init() factory weren't installed, the second EnsureResolved would hit the server (2 fetches, not 1) and that test would fail. Co-authored-by: Isaac --- libs/hostmetadata/resolver_test.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/libs/hostmetadata/resolver_test.go b/libs/hostmetadata/resolver_test.go index eb68f8b30a..8db0f8b0c1 100644 --- a/libs/hostmetadata/resolver_test.go +++ b/libs/hostmetadata/resolver_test.go @@ -14,11 +14,6 @@ import ( "github.com/stretchr/testify/require" ) -func TestInit_RegistersDefaultHostMetadataResolverFactory(t *testing.T) { - require.NotNil(t, config.DefaultHostMetadataResolverFactory, - "importing hostmetadata must register a factory so every *config.Config picks up the cached resolver") -} - func TestNewResolver_CacheHit_SkipsFetch(t *testing.T) { t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) From 1ec8a58eb98380026d3910b8f8f96bcd489a1ed5 Mon Sep 17 00:00:00 2001 From: simon Date: Tue, 21 Apr 2026 09:10:29 +0200 Subject: [PATCH 22/25] Address codex review: cleaner negative-cache probe and no raw error text on disk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues in the host-metadata resolver flagged by codex: 1. The negative-cache probe used GetOrCompute with a sentinel errNotCached value in the compute callback. That tripped the cache's "error while computing" debug log and local.cache.error telemetry metric on every positive-cache miss — even though the miss itself is not an error. Adds cache.Get[T] as a read-only lookup that never computes or writes, and uses it for the negative probe. Positive writes still go through GetOrCompute so concurrent resolves are still serialized by the cache mutex. 2. The negative sentinel persisted raw err.Error() to disk under Message, which was only read back into a debug log. Network errors can contain proxy URLs, internal hostnames, and other environment-sensitive text. Drop the Message field; only the existence of the sentinel matters. Regenerates acceptance outputs that captured the now-gone "error while computing: not cached" debug line. Co-authored-by: Isaac --- acceptance/cache/clear/output.txt | 6 ----- acceptance/cache/simple/output.txt | 3 --- acceptance/telemetry/failure/output.txt | 3 --- .../telemetry/partial-success/output.txt | 3 --- acceptance/telemetry/skipped/output.txt | 3 --- acceptance/telemetry/success/output.txt | 3 --- acceptance/telemetry/timeout/output.txt | 3 --- libs/cache/cache.go | 23 +++++++++++++++++++ libs/cache/file_cache.go | 14 +++++++++++ libs/cache/noop_file_cache.go | 4 ++++ libs/hostmetadata/resolver.go | 18 +++++---------- 11 files changed, 47 insertions(+), 36 deletions(-) diff --git a/acceptance/cache/clear/output.txt b/acceptance/cache/clear/output.txt index 1ccff0d23f..2d46b4c9f2 100644 --- a/acceptance/cache/clear/output.txt +++ b/acceptance/cache/clear/output.txt @@ -2,9 +2,6 @@ === First call in a session is expected to be a cache miss: [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing -[DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] -[DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing -[DEBUG_TIMESTAMP] Debug: [Local Cache] error while computing: not cached [DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing @@ -22,9 +19,6 @@ Cache cleared successfully from [TEST_TMP_DIR]/.cache === First call after a clear is expected to be a cache miss: [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing -[DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] -[DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing -[DEBUG_TIMESTAMP] Debug: [Local Cache] error while computing: not cached [DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing diff --git a/acceptance/cache/simple/output.txt b/acceptance/cache/simple/output.txt index 16ab339d68..2206ffdbc7 100644 --- a/acceptance/cache/simple/output.txt +++ b/acceptance/cache/simple/output.txt @@ -2,9 +2,6 @@ === First call in a session is expected to be a cache miss: [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing -[DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] -[DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing -[DEBUG_TIMESTAMP] Debug: [Local Cache] error while computing: not cached [DEBUG_TIMESTAMP] Debug: [Local Cache] computed and stored result [DEBUG_TIMESTAMP] Debug: [Local Cache] using cache key: [SHA256_HASH] [DEBUG_TIMESTAMP] Debug: [Local Cache] cache miss, computing diff --git a/acceptance/telemetry/failure/output.txt b/acceptance/telemetry/failure/output.txt index 96046fc400..2086a88444 100644 --- a/acceptance/telemetry/failure/output.txt +++ b/acceptance/telemetry/failure/output.txt @@ -3,9 +3,6 @@ HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID -HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID -HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { diff --git a/acceptance/telemetry/partial-success/output.txt b/acceptance/telemetry/partial-success/output.txt index 30b0d92675..c641e6bc0d 100644 --- a/acceptance/telemetry/partial-success/output.txt +++ b/acceptance/telemetry/partial-success/output.txt @@ -3,9 +3,6 @@ HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID -HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID -HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { diff --git a/acceptance/telemetry/skipped/output.txt b/acceptance/telemetry/skipped/output.txt index 6d03b4d138..9e784a8eb0 100644 --- a/acceptance/telemetry/skipped/output.txt +++ b/acceptance/telemetry/skipped/output.txt @@ -3,9 +3,6 @@ HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID -HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID -HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { diff --git a/acceptance/telemetry/success/output.txt b/acceptance/telemetry/success/output.txt index 87a494f571..96f72c9727 100644 --- a/acceptance/telemetry/success/output.txt +++ b/acceptance/telemetry/success/output.txt @@ -3,9 +3,6 @@ HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID -HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID -HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { diff --git a/acceptance/telemetry/timeout/output.txt b/acceptance/telemetry/timeout/output.txt index dafadbf22e..21f79ef7be 100644 --- a/acceptance/telemetry/timeout/output.txt +++ b/acceptance/telemetry/timeout/output.txt @@ -3,9 +3,6 @@ HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID -HH:MM:SS Debug: [Local Cache] using cache key: [SHA256_HASH] pid=PID -HH:MM:SS Debug: [Local Cache] cache miss, computing pid=PID -HH:MM:SS Debug: [Local Cache] error while computing: not cached pid=PID HH:MM:SS Debug: GET /.well-known/databricks-config < HTTP/1.1 200 OK < { diff --git a/libs/cache/cache.go b/libs/cache/cache.go index 513f7ebd00..23bb7b29c4 100644 --- a/libs/cache/cache.go +++ b/libs/cache/cache.go @@ -13,6 +13,10 @@ type cacheImpl interface { // The compute function must return JSON-encoded data as []byte. // The returned []byte is also expected to be JSON-encoded. getOrComputeJSON(ctx context.Context, fingerprint any, compute func(ctx context.Context) ([]byte, error)) ([]byte, error) + + // getJSON returns cached JSON bytes for fingerprint, or (nil, false) on + // miss or when caching is disabled. Never computes, never writes. + getJSON(ctx context.Context, fingerprint any) ([]byte, bool) } // Cache provides a concrete cache that works with any type through the generic GetOrCompute function. @@ -21,6 +25,25 @@ type Cache struct { impl cacheImpl } +// Get returns the cached value for the given fingerprint, or (zero, false) on +// miss. Unlike GetOrCompute it never invokes compute and never writes. Use +// this when the caller wants a read-only probe and will handle a miss +// explicitly, without the cache-level "error while computing" log that an +// erroring compute callback would emit. +func Get[T any](ctx context.Context, c *Cache, fingerprint any) (T, bool) { + var zero T + data, ok := c.impl.getJSON(ctx, fingerprint) + if !ok { + return zero, false + } + var result T + if err := json.Unmarshal(data, &result); err != nil { + log.Debugf(ctx, "[Local Cache] failed to unmarshal cached data: %v", err) + return zero, false + } + return result, true +} + // GetOrCompute retrieves cached content for the given fingerprint, or computes it using the provided function. // If the content is found in cache, it is returned directly. // If not found, the compute function is called, its result is cached, and then returned. diff --git a/libs/cache/file_cache.go b/libs/cache/file_cache.go index ee00f82590..8c5b79bc03 100644 --- a/libs/cache/file_cache.go +++ b/libs/cache/file_cache.go @@ -145,6 +145,20 @@ func NewCache(ctx context.Context, component string, expiry time.Duration, metri return &Cache{impl: fc} } +func (fc *fileCache) getJSON(ctx context.Context, fingerprint any) ([]byte, bool) { + if !fc.cacheEnabled { + return nil, false + } + cacheKey, err := fingerprintToHash(fingerprint) + if err != nil { + log.Debugf(ctx, "[Local Cache] failed to generate cache key: %v", err) + return nil, false + } + fc.mu.Lock() + defer fc.mu.Unlock() + return fc.readFromCacheJSON(ctx, fc.getCachePath(cacheKey)) +} + func (fc *fileCache) addTelemetryMetric(key string) { if fc.metrics != nil { fc.metrics.SetBoolValue(key, true) diff --git a/libs/cache/noop_file_cache.go b/libs/cache/noop_file_cache.go index 4b71be43fc..a39e63f915 100644 --- a/libs/cache/noop_file_cache.go +++ b/libs/cache/noop_file_cache.go @@ -7,3 +7,7 @@ type noopFileCache struct{} func (c *noopFileCache) getOrComputeJSON(ctx context.Context, fingerprint any, compute func(ctx context.Context) ([]byte, error)) ([]byte, error) { return compute(ctx) } + +func (c *noopFileCache) getJSON(ctx context.Context, fingerprint any) ([]byte, bool) { + return nil, false +} diff --git a/libs/hostmetadata/resolver.go b/libs/hostmetadata/resolver.go index e4f7806885..d09f38b5a9 100644 --- a/libs/hostmetadata/resolver.go +++ b/libs/hostmetadata/resolver.go @@ -23,10 +23,6 @@ const ( negativeCacheTTL = 60 * time.Second ) -// errNotCached forces a cache miss in the negative-cache probe without storing -// anything, since GetOrCompute only writes on success. -var errNotCached = errors.New("not cached") - // errNegativeHit is returned from the positive-cache compute callback when the // negative cache already has a sentinel for the host. It signals the outer // resolver to return (nil, nil) without running fetch or writing to positive. @@ -38,9 +34,10 @@ type hostFingerprint struct { } // negativeSentinel records a failed host-metadata fetch in the negative cache. +// Only the presence of the entry matters; no details about the original error +// are persisted to disk. type negativeSentinel struct { - Error bool `json:"error"` - Message string `json:"message"` + Error bool `json:"error"` } func init() { @@ -69,11 +66,8 @@ func NewResolver(fetch config.HostMetadataResolver) config.HostMetadataResolver // Positive cache wraps the whole miss path so that the happy path (hit) // is a single disk read — no synthetic probe, no negative-cache traffic. meta, err := cache.GetOrCompute[*config.HostMetadata](ctx, positive, fp, func(ctx context.Context) (*config.HostMetadata, error) { - sentinel, sErr := cache.GetOrCompute[*negativeSentinel](ctx, negative, fp, func(ctx context.Context) (*negativeSentinel, error) { - return nil, errNotCached - }) - if sErr == nil && sentinel != nil && sentinel.Error { - log.Debugf(ctx, "[hostmetadata] negative cache hit for %s: %s", host, sentinel.Message) + if sentinel, ok := cache.Get[*negativeSentinel](ctx, negative, fp); ok && sentinel != nil && sentinel.Error { + log.Debugf(ctx, "[hostmetadata] negative cache hit for %s", host) return nil, errNegativeHit } return fetch(ctx, host) @@ -92,7 +86,7 @@ func NewResolver(fetch config.HostMetadataResolver) config.HostMetadataResolver log.Debugf(ctx, "[hostmetadata] fetch failed for %s, recording negative: %v", host, err) // Best-effort write; ignore failures. _, _ = cache.GetOrCompute[*negativeSentinel](ctx, negative, fp, func(ctx context.Context) (*negativeSentinel, error) { - return &negativeSentinel{Error: true, Message: err.Error()}, nil + return &negativeSentinel{Error: true}, nil }) return nil, nil } From 29d7389c83ead8fb3c757cf4e81d2de40b9a6dfb Mon Sep 17 00:00:00 2001 From: simon Date: Tue, 21 Apr 2026 10:15:31 +0200 Subject: [PATCH 23/25] Trim comment noise and a tautological test - Drop TestNewResolver_DifferentHosts_SeparateEntries: the assertion that two hosts get separate cache entries just restates the fingerprint's Host-keyed design. - Collapse the four-line context.Background() comment into the existing //nolint line; drop the one-line hostFingerprint comment that restated the struct; shorten the negativeSentinel comment. Co-authored-by: Isaac --- libs/hostmetadata/resolver.go | 12 +++--------- libs/hostmetadata/resolver_test.go | 17 ----------------- 2 files changed, 3 insertions(+), 26 deletions(-) diff --git a/libs/hostmetadata/resolver.go b/libs/hostmetadata/resolver.go index d09f38b5a9..d2a91fca21 100644 --- a/libs/hostmetadata/resolver.go +++ b/libs/hostmetadata/resolver.go @@ -28,14 +28,12 @@ const ( // resolver to return (nil, nil) without running fetch or writing to positive. var errNegativeHit = errors.New("negative cache hit") -// hostFingerprint is the cache key for a given host. type hostFingerprint struct { Host string `json:"host"` } -// negativeSentinel records a failed host-metadata fetch in the negative cache. -// Only the presence of the entry matters; no details about the original error -// are persisted to disk. +// negativeSentinel marks a host whose last fetch failed. Only presence matters; +// the original error text is deliberately not persisted to disk. type negativeSentinel struct { Error bool `json:"error"` } @@ -52,11 +50,7 @@ func init() { // so subsequent calls within negativeCacheTTL skip the network. The fetch // function is invoked on miss, typically cfg.DefaultHostMetadataResolver(). func NewResolver(fetch config.HostMetadataResolver) config.HostMetadataResolver { - // cache.NewCache uses ctx only for env lookups and cleanup-walk debug - // logs; there is no cancellation signal to propagate. Using a background - // context keeps NewResolver callable from sites without a caller ctx - // in scope (e.g. the factory invoked from Config.EnsureResolved). - ctx := context.Background() //nolint:gocritic // no caller ctx and cache.NewCache does not use ctx for cancellation. + ctx := context.Background() //nolint:gocritic // no caller ctx; cache.NewCache uses ctx only for env lookups and cleanup-walk logs. positive := cache.NewCache(ctx, positiveCacheComponent, positiveCacheTTL, nil) negative := cache.NewCache(ctx, negativeCacheComponent, negativeCacheTTL, nil) diff --git a/libs/hostmetadata/resolver_test.go b/libs/hostmetadata/resolver_test.go index 8db0f8b0c1..965f38cd7e 100644 --- a/libs/hostmetadata/resolver_test.go +++ b/libs/hostmetadata/resolver_test.go @@ -78,23 +78,6 @@ func TestNewResolver_CancellationNotCached(t *testing.T) { assert.Equal(t, int32(2), calls.Load(), "cancellation must not be negatively cached") } -func TestNewResolver_DifferentHosts_SeparateEntries(t *testing.T) { - t.Setenv("DATABRICKS_CACHE_DIR", t.TempDir()) - - fetch := func(ctx context.Context, host string) (*config.HostMetadata, error) { - return &config.HostMetadata{AccountID: "acct-for-" + host}, nil - } - r := hostmetadata.NewResolver(fetch) - - mA, err := r(t.Context(), "https://a") - require.NoError(t, err) - mB, err := r(t.Context(), "https://b") - require.NoError(t, err) - - assert.Equal(t, "acct-for-https://a", mA.AccountID) - assert.Equal(t, "acct-for-https://b", mB.AccountID) -} - // TestFactory_EndToEnd_CacheHitSkipsSDKFetch is an integration sanity check // that importing hostmetadata installs a factory which back-fills every // *config.Config with a cached resolver. Two independent configs sharing From 9525f47ec2154333834ea7576133755f819b4d76 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 22 Apr 2026 13:44:06 +0200 Subject: [PATCH 24/25] Address Denis's review: Warnf on fetch fail, cache.Put, wording - Bump fetch-failure log from Debugf to Warnf (restores visibility that the SDK used to emit; capped at one per host per 60s by the negative cache). - Add cache.Put[T] primitive and use it for the negative sentinel write instead of the GetOrCompute workaround. - Reword "don't cache them" comment to spell out that we skip the negative-sentinel write for transient errors. - Expand the //nolint note explaining why NewResolver is stuck with context.Background (SDK factory signature has no ctx). - Drop superfluous `|| true` from the host-metadata-cache script; the commands succeed. Co-authored-by: Isaac --- acceptance/auth/host-metadata-cache/script | 4 +-- libs/cache/cache.go | 19 +++++++++++ libs/cache/file_cache.go | 14 ++++++++ libs/cache/file_cache_test.go | 39 ++++++++++++++++++++++ libs/cache/noop_file_cache.go | 3 ++ libs/hostmetadata/resolver.go | 16 +++++---- 6 files changed, 86 insertions(+), 9 deletions(-) diff --git a/acceptance/auth/host-metadata-cache/script b/acceptance/auth/host-metadata-cache/script index 2f88c987e4..f7a5f2fe0f 100644 --- a/acceptance/auth/host-metadata-cache/script +++ b/acceptance/auth/host-metadata-cache/script @@ -10,10 +10,10 @@ token = test-token EOF title "First invocation populates the cache\n" -$CLI auth profiles --skip-validate --output json || true +$CLI auth profiles --skip-validate --output json title "Second invocation should read from the cache\n" -$CLI auth profiles --skip-validate --output json || true +$CLI auth profiles --skip-validate --output json title "Only one /.well-known/databricks-config request recorded\n" print_requests.py //.well-known/databricks-config --get --sort diff --git a/libs/cache/cache.go b/libs/cache/cache.go index 23bb7b29c4..3fca87ed94 100644 --- a/libs/cache/cache.go +++ b/libs/cache/cache.go @@ -17,6 +17,11 @@ type cacheImpl interface { // getJSON returns cached JSON bytes for fingerprint, or (nil, false) on // miss or when caching is disabled. Never computes, never writes. getJSON(ctx context.Context, fingerprint any) ([]byte, bool) + + // putJSON writes data to the cache under fingerprint, overwriting any + // existing entry. When caching is disabled it is a no-op. Failures are + // silent (logged at debug). + putJSON(ctx context.Context, fingerprint any, data []byte) } // Cache provides a concrete cache that works with any type through the generic GetOrCompute function. @@ -79,3 +84,17 @@ func GetOrCompute[T any](ctx context.Context, c *Cache, fingerprint any, compute return result, nil } + +// Put serializes value to JSON and writes it to the cache under fingerprint, +// overwriting any existing entry. Failures are silent; when caching is +// disabled it is a no-op. Use this when the caller wants an unconditional +// write (e.g. recording a negative sentinel) rather than the read-then-write +// semantics of GetOrCompute. +func Put[T any](ctx context.Context, c *Cache, fingerprint any, value T) { + data, err := json.Marshal(value) + if err != nil { + log.Debugf(ctx, "[Local Cache] failed to marshal value for cache write: %v", err) + return + } + c.impl.putJSON(ctx, fingerprint, data) +} diff --git a/libs/cache/file_cache.go b/libs/cache/file_cache.go index 8c5b79bc03..3d03fa3d16 100644 --- a/libs/cache/file_cache.go +++ b/libs/cache/file_cache.go @@ -145,6 +145,20 @@ func NewCache(ctx context.Context, component string, expiry time.Duration, metri return &Cache{impl: fc} } +func (fc *fileCache) putJSON(ctx context.Context, fingerprint any, data []byte) { + if !fc.cacheEnabled { + return + } + cacheKey, err := fingerprintToHash(fingerprint) + if err != nil { + log.Debugf(ctx, "[Local Cache] failed to generate cache key for put: %v", err) + return + } + fc.mu.Lock() + defer fc.mu.Unlock() + fc.writeToCacheJSON(ctx, fc.getCachePath(cacheKey), data) +} + func (fc *fileCache) getJSON(ctx context.Context, fingerprint any) ([]byte, bool) { if !fc.cacheEnabled { return nil, false diff --git a/libs/cache/file_cache_test.go b/libs/cache/file_cache_test.go index 3a8470c59d..8a47f41c20 100644 --- a/libs/cache/file_cache_test.go +++ b/libs/cache/file_cache_test.go @@ -122,6 +122,45 @@ func TestFileCacheGetOrCompute(t *testing.T) { assert.Equal(t, int32(1), atomic.LoadInt32(&computeCalls)) } +func TestFileCachePut(t *testing.T) { + ctx := t.Context() + cacheDir := t.TempDir() + ctx = env.Set(ctx, "DATABRICKS_CACHE_ENABLED", "true") + ctx = env.Set(ctx, "DATABRICKS_CACHE_DIR", cacheDir) + + cache := NewCache(ctx, "test-component", 60*time.Minute, nil) + fingerprint := struct { + Key string `json:"key"` + }{Key: "put-test"} + + Put(ctx, cache, fingerprint, "first") + got, ok := Get[string](ctx, cache, fingerprint) + require.True(t, ok) + assert.Equal(t, "first", got) + + // Put overwrites, unlike GetOrCompute which preserves existing entries. + Put(ctx, cache, fingerprint, "second") + got, ok = Get[string](ctx, cache, fingerprint) + require.True(t, ok) + assert.Equal(t, "second", got) +} + +func TestFileCachePutDisabled(t *testing.T) { + ctx := t.Context() + cacheDir := t.TempDir() + ctx = env.Set(ctx, "DATABRICKS_CACHE_ENABLED", "false") + ctx = env.Set(ctx, "DATABRICKS_CACHE_DIR", cacheDir) + + cache := NewCache(ctx, "test-component", 60*time.Minute, nil) + fingerprint := struct { + Key string `json:"key"` + }{Key: "put-disabled"} + + Put(ctx, cache, fingerprint, "value") + _, ok := Get[string](ctx, cache, fingerprint) + assert.False(t, ok, "disabled cache must not persist Put writes") +} + func TestFileCacheGetOrComputeError(t *testing.T) { ctx := t.Context() tempDir := t.TempDir() diff --git a/libs/cache/noop_file_cache.go b/libs/cache/noop_file_cache.go index a39e63f915..3d79e8d887 100644 --- a/libs/cache/noop_file_cache.go +++ b/libs/cache/noop_file_cache.go @@ -11,3 +11,6 @@ func (c *noopFileCache) getOrComputeJSON(ctx context.Context, fingerprint any, c func (c *noopFileCache) getJSON(ctx context.Context, fingerprint any) ([]byte, bool) { return nil, false } + +func (c *noopFileCache) putJSON(ctx context.Context, fingerprint any, data []byte) { +} diff --git a/libs/hostmetadata/resolver.go b/libs/hostmetadata/resolver.go index d2a91fca21..7a47bc7a70 100644 --- a/libs/hostmetadata/resolver.go +++ b/libs/hostmetadata/resolver.go @@ -50,7 +50,12 @@ func init() { // so subsequent calls within negativeCacheTTL skip the network. The fetch // function is invoked on miss, typically cfg.DefaultHostMetadataResolver(). func NewResolver(fetch config.HostMetadataResolver) config.HostMetadataResolver { - ctx := context.Background() //nolint:gocritic // no caller ctx; cache.NewCache uses ctx only for env lookups and cleanup-walk logs. + // The SDK factory signature (func(cfg *config.Config) HostMetadataResolver) + // gives us no caller ctx at construction, so Background is the only option + // here. cache.NewCache uses ctx only for a one-time env lookup and + // cleanup-walk logging; per-call ctx still flows through the returned + // resolver below. + ctx := context.Background() //nolint:gocritic // forced by SDK factory signature; see comment above. positive := cache.NewCache(ctx, positiveCacheComponent, positiveCacheTTL, nil) negative := cache.NewCache(ctx, negativeCacheComponent, negativeCacheTTL, nil) @@ -73,15 +78,12 @@ func NewResolver(fetch config.HostMetadataResolver) config.HostMetadataResolver return nil, nil } // Transient errors (cancellation, deadline) say nothing about the - // host's long-term availability — don't cache them. + // host's long-term availability — don't write a negative sentinel. if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { return nil, nil } - log.Debugf(ctx, "[hostmetadata] fetch failed for %s, recording negative: %v", host, err) - // Best-effort write; ignore failures. - _, _ = cache.GetOrCompute[*negativeSentinel](ctx, negative, fp, func(ctx context.Context) (*negativeSentinel, error) { - return &negativeSentinel{Error: true}, nil - }) + log.Warnf(ctx, "[hostmetadata] fetch failed for %s, recording negative: %v", host, err) + cache.Put(ctx, negative, fp, &negativeSentinel{Error: true}) return nil, nil } } From b6110bae1624b0378efa5ad6ea06e4d3b1b338bb Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 22 Apr 2026 13:57:28 +0200 Subject: [PATCH 25/25] Stabilize Warn text so acceptance goldens aren't env-dependent The raw error from the host-metadata fetch varies by environment (DNS error format differs on macOS vs Linux, and some hosts return HTML or TLS failures), which broke the cmd/auth/profiles and auth/bundle_and_profile acceptance tests on CI. Split the log output: Warn keeps only the host and the retry window (stable across platforms), and the raw error drops to Debug for the -v crowd. Regenerated the two affected golden outputs and dropped a couple of now-dead test.toml replacements that only existed to redact the old error text. Co-authored-by: Isaac --- acceptance/auth/bundle_and_profile/output.txt | 2 ++ acceptance/auth/bundle_and_profile/test.toml | 4 ---- acceptance/cmd/auth/profiles/output.txt | 1 + acceptance/cmd/auth/profiles/test.toml | 6 ------ libs/hostmetadata/resolver.go | 6 +++++- 5 files changed, 8 insertions(+), 11 deletions(-) diff --git a/acceptance/auth/bundle_and_profile/output.txt b/acceptance/auth/bundle_and_profile/output.txt index 5501ab5a6e..b2bab9342a 100644 --- a/acceptance/auth/bundle_and_profile/output.txt +++ b/acceptance/auth/bundle_and_profile/output.txt @@ -13,6 +13,7 @@ === Inside the bundle, profile flag not matching bundle host. Should use profile from the flag and not the bundle. >>> errcode [CLI] current-user me -p profile_name +Warn: [hostmetadata] failed to fetch host metadata for https://non.existing.subdomain.databricks.com, will skip for 1m0s Error: Get "https://non.existing.subdomain.databricks.com/api/2.0/preview/scim/v2/Me": (redacted) Exit code: 1 @@ -72,6 +73,7 @@ Validation OK! === Bundle commands load bundle configuration with -t and -p flag, validation not OK (profile host don't match bundle host) >>> errcode [CLI] bundle validate -t prod -p DEFAULT +Warn: [hostmetadata] failed to fetch host metadata for https://bar.com, will skip for 1m0s Error: cannot resolve bundle auth configuration: the host in the profile ([DATABRICKS_TARGET]) doesn’t match the host configured in the bundle (https://bar.com) Name: test-auth diff --git a/acceptance/auth/bundle_and_profile/test.toml b/acceptance/auth/bundle_and_profile/test.toml index 477e83a18d..92458e9d30 100644 --- a/acceptance/auth/bundle_and_profile/test.toml +++ b/acceptance/auth/bundle_and_profile/test.toml @@ -9,10 +9,6 @@ New='DATABRICKS_TARGET' Old='DATABRICKS_URL' New='DATABRICKS_TARGET' -[[Repls]] -Old='Warn: Failed to resolve host metadata: .*\. Falling back to user config\.' -New='Warn: Failed to resolve host metadata: (redacted). Falling back to user config.' - [[Repls]] Old='Get "https://non.existing.subdomain.databricks.com/api/2.0/preview/scim/v2/Me": .*' New='Get "https://non.existing.subdomain.databricks.com/api/2.0/preview/scim/v2/Me": (redacted)' diff --git a/acceptance/cmd/auth/profiles/output.txt b/acceptance/cmd/auth/profiles/output.txt index a04bb2dfaa..207e2d5471 100644 --- a/acceptance/cmd/auth/profiles/output.txt +++ b/acceptance/cmd/auth/profiles/output.txt @@ -1,5 +1,6 @@ === Profiles with workspace_id (JSON output) +Warn: [hostmetadata] failed to fetch host metadata for https://test.cloud.databricks.com, will skip for 1m0s { "profiles": [ { diff --git a/acceptance/cmd/auth/profiles/test.toml b/acceptance/cmd/auth/profiles/test.toml index ad8ec1f872..36c0e7e237 100644 --- a/acceptance/cmd/auth/profiles/test.toml +++ b/acceptance/cmd/auth/profiles/test.toml @@ -1,9 +1,3 @@ Ignore = [ "home" ] - -# Normalize platform-specific DNS error messages in host metadata warnings. -# Linux includes resolver address (e.g. "on 127.0.0.53:53"), macOS does not. -[[Repls]] -Old = 'dial tcp: lookup (\S+)( on \S+)?: no such host' -New = 'dial tcp: lookup $1: no such host' diff --git a/libs/hostmetadata/resolver.go b/libs/hostmetadata/resolver.go index 7a47bc7a70..595e37bd22 100644 --- a/libs/hostmetadata/resolver.go +++ b/libs/hostmetadata/resolver.go @@ -82,7 +82,11 @@ func NewResolver(fetch config.HostMetadataResolver) config.HostMetadataResolver if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { return nil, nil } - log.Warnf(ctx, "[hostmetadata] fetch failed for %s, recording negative: %v", host, err) + // The raw error is env-dependent (DNS vs TLS vs HTTP) and would make + // acceptance goldens brittle, so keep it at Debug; the Warn text is + // stable (host only) for user visibility. + log.Warnf(ctx, "[hostmetadata] failed to fetch host metadata for %s, will skip for %s", host, negativeCacheTTL) + log.Debugf(ctx, "[hostmetadata] fetch error for %s: %v", host, err) cache.Put(ctx, negative, fp, &negativeSentinel{Error: true}) return nil, nil }