From 0e40f2d8acfa3308cd37cd1d430eaa89589f04f2 Mon Sep 17 00:00:00 2001 From: Jan Rose Date: Fri, 5 Jun 2026 15:49:17 +0200 Subject: [PATCH 1/2] Add index_subtype backend_default for vector search indexes --- .../recreate/with_endpoint/output.txt | 6 ++++++ bundle/direct/dresources/resources.yml | 3 +++ libs/testserver/vector_search_indexes.go | 7 +++++++ 3 files changed, 16 insertions(+) diff --git a/acceptance/bundle/resources/vector_search_indexes/recreate/with_endpoint/output.txt b/acceptance/bundle/resources/vector_search_indexes/recreate/with_endpoint/output.txt index 7838a374446..20c139225ed 100644 --- a/acceptance/bundle/resources/vector_search_indexes/recreate/with_endpoint/output.txt +++ b/acceptance/bundle/resources/vector_search_indexes/recreate/with_endpoint/output.txt @@ -73,6 +73,7 @@ Plan: 1 to add, 0 to change, 1 to delete, 1 unchanged }, "endpoint_name": "vs-endpoint-[UNIQUE_NAME]", "endpoint_uuid": "[UUID]", + "index_subtype": "HYBRID", "index_type": "DIRECT_ACCESS", "name": "main.default.vs_index_[UNIQUE_NAME]", "primary_key": "id", @@ -86,6 +87,11 @@ Plan: 1 to add, 0 to change, 1 to delete, 1 unchanged "reason": "state-only field", "old": "[UUID]", "remote": "[UUID]" + }, + "index_subtype": { + "action": "skip", + "reason": "backend_default", + "remote": "HYBRID" } } } diff --git a/bundle/direct/dresources/resources.yml b/bundle/direct/dresources/resources.yml index c283e6f0b68..75a60e0dc62 100644 --- a/bundle/direct/dresources/resources.yml +++ b/bundle/direct/dresources/resources.yml @@ -608,3 +608,6 @@ resources: reason: immutable - field: direct_access_index_spec reason: immutable + backend_defaults: + # The Vector Search API assigns index_subtype when the config omits it + - field: index_subtype diff --git a/libs/testserver/vector_search_indexes.go b/libs/testserver/vector_search_indexes.go index 9c99772fc30..15105682cf7 100644 --- a/libs/testserver/vector_search_indexes.go +++ b/libs/testserver/vector_search_indexes.go @@ -64,11 +64,18 @@ func (s *FakeWorkspace) VectorSearchIndexCreate(req Request) Response { } } + // The backend assigns index_subtype when the request omits it (HYBRID by default) + indexSubtype := createReq.IndexSubtype + if indexSubtype == "" { + indexSubtype = vectorsearch.IndexSubtypeHybrid + } + index := fakeVectorSearchIndex{ VectorIndex: vectorsearch.VectorIndex{ Creator: s.CurrentUser().UserName, EndpointName: createReq.EndpointName, IndexType: createReq.IndexType, + IndexSubtype: indexSubtype, Name: createReq.Name, PrimaryKey: createReq.PrimaryKey, DeltaSyncIndexSpec: remapDeltaSyncSpec(createReq.DeltaSyncIndexSpec), From 3b9aface0a9ec78400d5826b9279e44cc49afcb4 Mon Sep 17 00:00:00 2001 From: Jan Rose Date: Fri, 5 Jun 2026 15:50:18 +0200 Subject: [PATCH 2/2] Add --unique to print_requests --- acceptance/bin/print_requests.py | 35 +++++++++++++++++-- .../recreate/embedding_dimension/output.txt | 4 +-- .../recreate/embedding_dimension/script | 4 ++- 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/acceptance/bin/print_requests.py b/acceptance/bin/print_requests.py index bd4ab1eb381..acf63874432 100755 --- a/acceptance/bin/print_requests.py +++ b/acceptance/bin/print_requests.py @@ -56,6 +56,25 @@ >>> # Test positive + negative filters (AND logic) >>> test(test_requests, ["//api", "^/jobs"], False, False) R4 POST + +>>> # --unique collapses consecutive duplicate requests (like uniq), e.g. repeated GET polls +>>> dup_requests = [ +... {"method": "POST", "path": "/api/2.0/idx", "body": {"n": 1}}, +... {"method": "GET", "path": "/api/2.0/idx"}, +... {"method": "GET", "path": "/api/2.0/idx"}, +... ] +>>> [x["method"] for x in filter_requests(dup_requests, ["//idx"], True, False, unique=True)] +['POST', 'GET'] + +>>> # Only consecutive duplicates collapse; a repeat after another request is kept +>>> seq = [ +... {"method": "GET", "path": "/api/2.0/idx"}, +... {"method": "DELETE", "path": "/api/2.0/idx"}, +... {"method": "GET", "path": "/api/2.0/idx"}, +... {"method": "GET", "path": "/api/2.0/idx"}, +... ] +>>> [x["method"] for x in filter_requests(seq, ["//idx"], True, False, unique=True)] +['GET', 'DELETE', 'GET'] """ import os @@ -104,7 +123,7 @@ def read_json_many(s): assert result == [{"method": "GET"}, {"method": "POST"}], result -def filter_requests(requests, path_filters, include_get, should_sort): +def filter_requests(requests, path_filters, include_get, should_sort, unique=False): """Filter requests based on method and path filters.""" positive_filters = [] negative_filters = [] @@ -145,6 +164,13 @@ def filter_requests(requests, path_filters, include_get, should_sort): if should_sort: filtered_requests.sort(key=str) + if unique: + deduped = [] + for req in filtered_requests: + if not deduped or deduped[-1] != req: + deduped.append(req) + filtered_requests = deduped + return filtered_requests @@ -155,6 +181,11 @@ def main(): parser.add_argument("--get", action="store_true", help="Include GET requests (excluded by default)") parser.add_argument("--keep", action="store_true", help="Keep out.requests.json file after processing") parser.add_argument("--sort", action="store_true", help="Sort requests before output") + parser.add_argument( + "--unique", + action="store_true", + help="Collapse consecutive duplicate requests (like uniq), e.g. repeated GET polls", + ) parser.add_argument("--oneline", action="store_true", help="Print output with one request per line") parser.add_argument("--fname", default="out.requests.txt") args = parser.parse_args() @@ -175,7 +206,7 @@ def main(): return requests = read_json_many(data) - filtered_requests = filter_requests(requests, args.path_filters, args.get, args.sort) + filtered_requests = filter_requests(requests, args.path_filters, args.get, args.sort, args.unique) if args.verbose: print( f"Read {len(data)} chars, {len(requests)} requests, {len(filtered_requests)} after filtering", diff --git a/acceptance/bundle/resources/vector_search_indexes/recreate/embedding_dimension/output.txt b/acceptance/bundle/resources/vector_search_indexes/recreate/embedding_dimension/output.txt index 0c04db43f89..ebc663c33e7 100644 --- a/acceptance/bundle/resources/vector_search_indexes/recreate/embedding_dimension/output.txt +++ b/acceptance/bundle/resources/vector_search_indexes/recreate/embedding_dimension/output.txt @@ -6,7 +6,7 @@ Deploying resources... Updating deployment state... Deployment complete! ->>> print_requests.py --get //vector-search/indexes +>>> print_requests.py --get --unique //vector-search/indexes === Change embedding_dimension (should trigger recreation) >>> update_file.py databricks.yml embedding_dimension: 768 embedding_dimension: 384 @@ -27,7 +27,7 @@ Deploying resources... Updating deployment state... Deployment complete! ->>> print_requests.py --get //vector-search/indexes +>>> print_requests.py --get --unique //vector-search/indexes >>> [CLI] vector-search-indexes get-index main.default.vs_index_[UNIQUE_NAME] { diff --git a/acceptance/bundle/resources/vector_search_indexes/recreate/embedding_dimension/script b/acceptance/bundle/resources/vector_search_indexes/recreate/embedding_dimension/script index 6c994cd4586..c56fb89841f 100644 --- a/acceptance/bundle/resources/vector_search_indexes/recreate/embedding_dimension/script +++ b/acceptance/bundle/resources/vector_search_indexes/recreate/embedding_dimension/script @@ -12,7 +12,9 @@ print_requests() { # without that, removing the wait would silently still pass against the # testserver (which finishes deletion synchronously) — the GET in the log # is the assertion that the wait actually fired. - trace print_requests.py --get '//vector-search/indexes' > out.requests.${name}.$DATABRICKS_BUNDLE_ENGINE.json + # --unique collapses the repeated identical poll GETs (one on the testserver, + # many on a real workspace that provisions asynchronously) to a stable count. + trace print_requests.py --get --unique '//vector-search/indexes' > out.requests.${name}.$DATABRICKS_BUNDLE_ENGINE.json rm -f out.requests.txt }