From feb5315c3a08e73eb02d66969e6236bbc94ed636 Mon Sep 17 00:00:00 2001 From: Sanjay Dutt Date: Thu, 23 Apr 2026 15:17:40 +0530 Subject: [PATCH 1/4] Add derived stored retrieval for DenseVectorField to avoid duplicate vector storage --- .../component/RealTimeGetComponent.java | 18 +-- .../apache/solr/schema/DenseVectorField.java | 28 +++- .../solr/search/SolrDocumentFetcher.java | 141 +++++++++++++++++- .../apache/solr/search/SolrReturnFields.java | 5 + ...densevector-derived-stored-multivalued.xml | 22 +++ .../schema-densevector-derived-stored.xml | 54 +++++++ .../solr/schema/DenseVectorFieldTest.java | 94 ++++++++++++ .../pages/dense-vector-search.adoc | 19 +++ 8 files changed, 365 insertions(+), 16 deletions(-) create mode 100644 solr/core/src/test-files/solr/collection1/conf/bad-schema-densevector-derived-stored-multivalued.xml create mode 100644 solr/core/src/test-files/solr/collection1/conf/schema-densevector-derived-stored.xml diff --git a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java index d1dc15d9a496..63664621c6ca 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java @@ -241,7 +241,6 @@ public void process(ResponseBuilder rb) throws IOException { boolean opennedRealtimeSearcher = false; BytesRefBuilder idBytes = new BytesRefBuilder(); - DocValuesIteratorCache reuseDvIters = null; for (String idStr : reqIds.allIds) { fieldType.readableToIndexed(idStr, idBytes); // if _route_ is passed, id is a child doc. TODO remove in SOLR-15064 @@ -349,14 +348,10 @@ public void process(ResponseBuilder rb) throws IOException { if (docid < 0) continue; SolrDocumentFetcher docFetcher = searcherInfo.getSearcher().getDocFetcher(); - Document luceneDocument = - docFetcher.doc(docid, rsp.getReturnFields().getLuceneFieldNames()); - SolrDocument doc = toSolrDoc(luceneDocument, core.getLatestSchema()); - if (reuseDvIters == null) { - reuseDvIters = new DocValuesIteratorCache(searcherInfo.getSearcher()); - } - docFetcher.decorateDocValueFields( - doc, docid, docFetcher.getNonStoredDVs(true), reuseDvIters); + SolrReturnFields solrReturnFields = (SolrReturnFields) rsp.getReturnFields(); + solrReturnFields.resetFetchOptimizer(); + SolrDocument doc = docFetcher.solrDoc(docid, solrReturnFields); + removeCopyFieldTargets(doc, req.getSchema()); if (null != transformer) { if (null == resultContext) { // either first pass, or we've re-opened searcher - either way now we setContext @@ -618,7 +613,10 @@ private static SolrDocument mergePartialDocWithFullDocFromIndex( private static SolrDocument fetchSolrDoc( SolrIndexSearcher searcher, int docId, ReturnFields returnFields) throws IOException { final SolrDocumentFetcher docFetcher = searcher.getDocFetcher(); - final SolrDocument solrDoc = docFetcher.solrDoc(docId, (SolrReturnFields) returnFields); + final SolrReturnFields solrReturnFields = (SolrReturnFields) returnFields; + solrReturnFields.resetFetchOptimizer(); + final SolrDocument solrDoc = docFetcher.solrDoc(docId, solrReturnFields); + removeCopyFieldTargets(solrDoc, searcher.getSchema()); final DocTransformer transformer = returnFields.getTransformer(); if (transformer != null) { transformer.setContext( diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index f29714d5b1ee..503f82815d70 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -78,6 +78,7 @@ public class DenseVectorField extends FloatPointField { static final String HNSW_M = "hnswM"; static final String HNSW_EF_CONSTRUCTION = "hnswEfConstruction"; static final String VECTOR_ENCODING = "vectorEncoding"; + static final String USE_VECTOR_VALUES_AS_STORED = "useVectorValuesAsStored"; static final VectorEncoding DEFAULT_VECTOR_ENCODING = VectorEncoding.FLOAT32; static final String KNN_SIMILARITY_FUNCTION = "similarityFunction"; static final VectorSimilarityFunction DEFAULT_SIMILARITY = VectorSimilarityFunction.EUCLIDEAN; @@ -117,6 +118,8 @@ public class DenseVectorField extends FloatPointField { */ private VectorEncoding vectorEncoding; + private boolean useVectorValuesAsStored; + private int cuvsWriterThreads; private int cuvsIntGraphDegree; private int cuvsGraphDegree; @@ -187,6 +190,10 @@ public void init(IndexSchema schema, Map args) { .orElse(DEFAULT_VECTOR_ENCODING); args.remove(VECTOR_ENCODING); + this.useVectorValuesAsStored = + ofNullable(args.get(USE_VECTOR_VALUES_AS_STORED)).map(Boolean::parseBoolean).orElse(false); + args.remove(USE_VECTOR_VALUES_AS_STORED); + this.hnswM = ofNullable(args.get(HNSW_M)) .map(Integer::parseInt) @@ -309,6 +316,10 @@ public int getCuvsHnswEfConstruction() { return cuvsHnswEfConstruction; } + public boolean useVectorValuesAsStored() { + return useVectorValuesAsStored; + } + @Override protected boolean enableDocValuesByDefault() { return false; @@ -324,6 +335,21 @@ public void checkSchemaField(final SchemaField field) throws SolrException { getClass().getSimpleName() + " fields can not have docValues: " + field.getName()); } + if (useVectorValuesAsStored) { + if (!field.stored()) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + USE_VECTOR_VALUES_AS_STORED + " requires stored=true for field " + field.getName()); + } + if (field.multiValued()) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + USE_VECTOR_VALUES_AS_STORED + + " is not supported for multiValued DenseVectorField: " + + field.getName()); + } + } + switch (vectorEncoding) { case FLOAT32: if (dimension > KnnVectorsFormat.DEFAULT_MAX_DIMENSIONS) { @@ -360,7 +386,7 @@ public List createFields(SchemaField field, Object value) { if (field.indexed()) { fields.add(createField(field, vectorBuilder)); } - if (field.stored()) { + if (field.stored() && !useVectorValuesAsStored) { switch (vectorEncoding) { case FLOAT32: fields.ensureCapacity(vectorBuilder.getFloatVector().length + 1); diff --git a/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java b/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java index 3366d5241ef5..02f87cf1cc9c 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java @@ -43,12 +43,15 @@ import org.apache.lucene.document.StoredValue; import org.apache.lucene.document.TextField; import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableFieldType; +import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; @@ -68,6 +71,7 @@ import org.apache.solr.response.DocsStreamer; import org.apache.solr.response.ResultContext; import org.apache.solr.schema.BoolField; +import org.apache.solr.schema.DenseVectorField; import org.apache.solr.schema.EnumFieldType; import org.apache.solr.schema.LatLonPointSpatialField; import org.apache.solr.schema.NumberType; @@ -95,6 +99,7 @@ public class SolrDocumentFetcher { private final Set allStored; private final Set dvsCanSubstituteStored; + private final Set derivedStoredVectorFields; /** Contains the names/patterns of all docValues=true,stored=false fields in the schema. */ private final Set allNonStoredDVs; @@ -133,6 +138,7 @@ private SolrDocumentFetcher(SolrDocumentFetcher template, StoredFields storedFie this.largeFields = template.largeFields; this.dvsCanSubstituteStored = template.dvsCanSubstituteStored; this.allStored = template.allStored; + this.derivedStoredVectorFields = template.derivedStoredVectorFields; this.storedHighlightFieldNames = template.indexedFieldNames; this.indexedFieldNames = template.indexedFieldNames; this.storedFields = storedFields; @@ -169,6 +175,15 @@ protected SolrDocumentFetcher clone() { final Set storedLargeFields = new HashSet<>(); final Set dvsCanSubstituteStored = new HashSet<>(); final Set allStoreds = new HashSet<>(); + final Set derivedStoredVectors = new HashSet<>(); + + for (SchemaField schemaField : searcher.getSchema().getFields().values()) { + if (schemaField.getType() instanceof DenseVectorField vectorField + && schemaField.stored() + && vectorField.useVectorValuesAsStored()) { + derivedStoredVectors.add(schemaField.getName()); + } + } // can find materialized dynamic fields, unlike using the Solr IndexSchema. for (FieldInfo fieldInfo : searcher.getFieldInfos()) { @@ -182,6 +197,11 @@ protected SolrDocumentFetcher clone() { if (schemaField.stored()) { allStoreds.add(fieldInfo.name); } + if (schemaField.getType() instanceof DenseVectorField vectorField + && schemaField.stored() + && vectorField.useVectorValuesAsStored()) { + derivedStoredVectors.add(fieldInfo.name); + } if (!schemaField.stored() && schemaField.hasDocValues()) { if (schemaField.useDocValuesAsStored()) { nonStoredDVsUsedAsStored.add(fieldInfo.name); @@ -203,6 +223,7 @@ protected SolrDocumentFetcher clone() { this.largeFields = Collections.unmodifiableSet(storedLargeFields); this.dvsCanSubstituteStored = Collections.unmodifiableSet(dvsCanSubstituteStored); this.allStored = Collections.unmodifiableSet(allStoreds); + this.derivedStoredVectorFields = Collections.unmodifiableSet(derivedStoredVectors); this.storedFields = null; // template docFetcher should throw NPE if used directly this.storedHighlightFieldNames = new Collection[1]; this.indexedFieldNames = new Collection[1]; @@ -768,7 +789,7 @@ public Set getNonStoredDVsWithoutCopyTargets() { } /** - * Moved as a private class here, we consider it an impelmentation detail. It should not be + * Moved as a private class here, we consider it an implementation detail. It should not be * exposed outside of this class. * *

This class is in charge of insuring that SolrDocuments can have their fields populated @@ -780,6 +801,7 @@ class RetrieveFieldsOptimizer { private final Set storedFields; // always non null private final Set dvFields; + private final Set vectorFields; private final SolrReturnFields solrReturnFields; @@ -788,6 +810,7 @@ class RetrieveFieldsOptimizer { RetrieveFieldsOptimizer(SolrReturnFields solrReturnFields) { this.storedFields = calcStoredFieldsForReturn(solrReturnFields); this.dvFields = calcDocValueFieldsForReturn(solrReturnFields); + this.vectorFields = calcDerivedVectorFieldsForReturn(solrReturnFields); this.solrReturnFields = solrReturnFields; if (storedFields != null && dvsCanSubstituteStored.containsAll(storedFields)) { @@ -830,11 +853,27 @@ private Set calcStoredFieldsForReturn(ReturnFields returnFields) { if (returnFields.wantsAllFields()) { return null; } else if (returnFields.hasPatternMatching()) { - for (String s : getAllStored()) { - if (returnFields.wantsField(s)) { - storedFields.add(s); - } + if (fnames == null) { + return null; } + storedFields.addAll(fnames); + storedFields.removeIf( + (String name) -> { + SchemaField schemaField = searcher.getSchema().getFieldOrNull(name); + if (schemaField == null) { + // Get it from the stored fields if, for some reason, we can't get the schema. + return false; + } + if (schemaField.stored() && schemaField.multiValued()) { + // must return multivalued fields from stored data if possible. + return false; + } + if (schemaField.stored() == false) { + // if it's not stored, no choice but to return from DV. + return true; + } + return false; + }); } else if (fnames != null) { storedFields.addAll(fnames); storedFields.removeIf( @@ -893,6 +932,96 @@ private Set calcDocValueFieldsForReturn(ReturnFields returnFields) { return result; } + private Set calcDerivedVectorFieldsForReturn(ReturnFields returnFields) { + if (derivedStoredVectorFields.isEmpty()) { + return Set.of(); + } + + final Set result = new HashSet<>(); + if (returnFields.wantsAllFields()) { + result.addAll(derivedStoredVectorFields); + } else if (returnFields.hasPatternMatching()) { + for (String field : derivedStoredVectorFields) { + if (returnFields.wantsField(field)) { + result.add(field); + } + } + } else { + Set fnames = returnFields.getLuceneFieldNames(); + if (fnames != null) { + result.addAll(fnames); + result.retainAll(derivedStoredVectorFields); + } else { + for (String field : derivedStoredVectorFields) { + if (returnFields.wantsField(field)) { + result.add(field); + } + } + } + } + return result; + } + + private void decorateDerivedVectorFields(SolrDocument sdoc, int luceneDocId) + throws IOException { + if (vectorFields.isEmpty()) { + return; + } + + final List leafContexts = searcher.getLeafContexts(); + final int subIndex = ReaderUtil.subIndex(luceneDocId, leafContexts); + final LeafReaderContext leafReaderContext = leafContexts.get(subIndex); + final LeafReader leafReader = leafReaderContext.reader(); + final int localId = luceneDocId - leafReaderContext.docBase; + + for (String field : vectorFields) { + if (sdoc.containsKey(field)) { + continue; + } + + SchemaField schemaField = searcher.getSchema().getFieldOrNull(field); + if (schemaField == null + || !(schemaField.getType() instanceof DenseVectorField vectorField)) { + continue; + } + + switch (vectorField.getVectorEncoding()) { + case FLOAT32: + FloatVectorValues floatVectorValues = leafReader.getFloatVectorValues(field); + if (floatVectorValues == null) { + continue; + } + KnnVectorValues.DocIndexIterator floatIterator = floatVectorValues.iterator(); + if (floatIterator.advance(localId) != localId) { + continue; + } + float[] floatVector = floatVectorValues.vectorValue(floatIterator.index()); + List floatValues = new ArrayList<>(floatVector.length); + for (float value : floatVector) { + floatValues.add(value); + } + sdoc.setField(field, floatValues); + break; + case BYTE: + ByteVectorValues byteVectorValues = leafReader.getByteVectorValues(field); + if (byteVectorValues == null) { + continue; + } + KnnVectorValues.DocIndexIterator byteIterator = byteVectorValues.iterator(); + if (byteIterator.advance(localId) != localId) { + continue; + } + byte[] byteVector = byteVectorValues.vectorValue(byteIterator.index()); + List byteValues = new ArrayList<>(byteVector.length); + for (byte value : byteVector) { + byteValues.add((int) value); + } + sdoc.setField(field, byteValues); + break; + } + } + } + private SolrDocument getSolrDoc(int luceneDocId) { SolrDocument sdoc = null; @@ -903,6 +1032,7 @@ private SolrDocument getSolrDoc(int luceneDocId) { sdoc = DocsStreamer.convertLuceneDocToSolrDoc(doc, searcher.getSchema(), getReturnFields()); if (returnDVFields() == false) { + decorateDerivedVectorFields(sdoc, luceneDocId); solrReturnFields.setFieldSources(SolrReturnFields.FIELD_SOURCES.ALL_FROM_STORED); return sdoc; } else { @@ -918,6 +1048,7 @@ private SolrDocument getSolrDoc(int luceneDocId) { if (returnDVFields()) { decorateDocValueFields(sdoc, luceneDocId, getDvFields(), reuseDvIters); } + decorateDerivedVectorFields(sdoc, luceneDocId); } catch (IOException e) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, diff --git a/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java b/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java index bf850f4de8b5..1a63e5f2a846 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java +++ b/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java @@ -161,6 +161,11 @@ public RetrieveFieldsOptimizer getFetchOptimizer(Supplier + + + + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-densevector-derived-stored.xml b/solr/core/src/test-files/solr/collection1/conf/schema-densevector-derived-stored.xml new file mode 100644 index 000000000000..c90a4a58effb --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-densevector-derived-stored.xml @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + diff --git a/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java b/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java index f84eaf40ef0b..b6ab4a8ae248 100644 --- a/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java +++ b/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java @@ -93,6 +93,14 @@ public void fieldDefinition_docValues_shouldThrowException() throws Exception { "DenseVectorField fields can not have docValues: vector"); } + @Test + public void fieldDefinition_derivedStoredMultiValued_shouldThrowException() throws Exception { + assertConfigs( + "solrconfig-basic.xml", + "bad-schema-densevector-derived-stored-multivalued.xml", + "useVectorValuesAsStored is not supported for multiValued DenseVectorField: vector"); + } + @Test public void fieldTypeDefinition_nullSimilarityDistance_shouldUseDefaultSimilarityEuclidean() throws Exception { @@ -698,6 +706,92 @@ public void denseVectorFieldOnAtomicUpdate_shouldBeUpdatedCorrectly() throws Exc } } + @Test + public void denseVectorField_useVectorValuesAsStored_shouldReturnVectorsInQueryResults() + throws Exception { + try { + initCore("solrconfig_codec.xml", "schema-densevector-derived-stored.xml"); + assertU( + adoc( + sdoc( + "id", + "0", + "vector", + Arrays.asList(1.1, 2.2, 3.3, 4.4), + "vector_byte_encoding", + Arrays.asList(5, 6, 7, 8), + "string_field", + "test"))); + assertU(commit()); + + assertJQ( + req("q", "id:0", "fl", "*"), + "/response/docs/[0]/vector==[1.1,2.2,3.3,4.4]", + "/response/docs/[0]/vector_byte_encoding==[5,6,7,8]", + "/response/docs/[0]/string_field=='test'"); + } finally { + deleteCore(); + } + } + + @Test + public void denseVectorField_useVectorValuesAsStored_shouldReturnVectorsInRealTimeGet() + throws Exception { + try { + initCore("solrconfig_codec.xml", "schema-densevector-derived-stored.xml"); + assertU( + adoc( + sdoc( + "id", + "0", + "vector", + Arrays.asList(1.1, 2.2, 3.3, 4.4), + "vector_byte_encoding", + Arrays.asList(5, 6, 7, 8)))); + assertU(commit()); + + assertJQ( + req("qt", "/get", "id", "0", "fl", "id,vector,vector_byte_encoding"), + "/doc/vector==[1.1,2.2,3.3,4.4]", + "/doc/vector_byte_encoding==[5,6,7,8]"); + } finally { + deleteCore(); + } + } + + @Test + public void denseVectorField_useVectorValuesAsStored_shouldPreserveVectorsAfterAtomicUpdate() + throws Exception { + assumeTrue( + "update log must be enabled for atomic update", + Boolean.getBoolean(System.getProperty("solr.index.updatelog.enabled"))); + try { + initCore("solrconfig.xml", "schema-densevector-derived-stored.xml"); + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "0"); + doc.addField("vector", Arrays.asList(1.1, 2.2, 3.3, 4.4)); + doc.addField("vector_byte_encoding", Arrays.asList(5, 6, 7, 8)); + doc.addField("string_field", "test"); + + assertU(adoc(doc)); + assertU(commit()); + + SolrInputDocument updateDoc = new SolrInputDocument(); + updateDoc.addField("id", "0"); + updateDoc.addField("string_field", Map.of("set", "other test")); + assertU(adoc(updateDoc)); + assertU(commit()); + + assertJQ( + req("q", "id:0", "fl", "*"), + "/response/docs/[0]/vector==[1.1,2.2,3.3,4.4]", + "/response/docs/[0]/vector_byte_encoding==[5,6,7,8]", + "/response/docs/[0]/string_field=='other test'"); + } finally { + deleteCore(); + } + } + @Test public void denseVectorByteEncoding_shouldRaiseExceptionWithValuesOutsideBoundaries() throws Exception { diff --git a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc index 15371bb02673..79f4e0d1d501 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc @@ -141,6 +141,19 @@ Please note that the `knnAlgorithm` accepted values may change in future release + Accepted values: `FLOAT32`, `BYTE`. +`useVectorValuesAsStored`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: `false` +|=== ++ +(advanced) When `true`, Solr preserves `stored="true"` retrieval semantics for the field while deriving the returned vector value from Lucene's vector index instead of writing a redundant stored-field copy at index time. ++ +This can reduce storage overhead for single-valued dense vector fields. ++ +Accepted values: `true`, `false`. + `hnswM`:: + @@ -176,6 +189,12 @@ Any integer. `DenseVectorField` supports the attributes: `indexed`, `stored`, `multivalued`. +If `useVectorValuesAsStored="true"` is enabled, the field must remain `stored="true"` and must be single-valued. + +When `useVectorValuesAsStored="true"` is enabled, Solr reconstructs the returned vector from Lucene's vector index at fetch time. +This is typically a good fit when you need stored-field compatibility for workflows such as `/get`, reindexing, or atomic updates, but do not routinely return the vector field in latency-sensitive traffic. +If your application commonly uses `fl=*` or otherwise returns the vector field on high-volume request paths, benchmark carefully before enabling this option. + Here's how a `DenseVectorField` should be indexed when single valued: [tabs#densevectorfield-index] From 3bd04a0fed1d79863271d9066b461696aba01f5a Mon Sep 17 00:00:00 2001 From: Sanjay Dutt Date: Sat, 9 May 2026 19:33:10 +0530 Subject: [PATCH 2/4] revert RTG changes --- .../component/RealTimeGetComponent.java | 18 +++++++------ .../apache/solr/search/SolrReturnFields.java | 5 ---- .../solr/schema/DenseVectorFieldTest.java | 25 ------------------- .../pages/dense-vector-search.adoc | 2 +- 4 files changed, 11 insertions(+), 39 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java index 63664621c6ca..d1dc15d9a496 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java @@ -241,6 +241,7 @@ public void process(ResponseBuilder rb) throws IOException { boolean opennedRealtimeSearcher = false; BytesRefBuilder idBytes = new BytesRefBuilder(); + DocValuesIteratorCache reuseDvIters = null; for (String idStr : reqIds.allIds) { fieldType.readableToIndexed(idStr, idBytes); // if _route_ is passed, id is a child doc. TODO remove in SOLR-15064 @@ -348,10 +349,14 @@ public void process(ResponseBuilder rb) throws IOException { if (docid < 0) continue; SolrDocumentFetcher docFetcher = searcherInfo.getSearcher().getDocFetcher(); - SolrReturnFields solrReturnFields = (SolrReturnFields) rsp.getReturnFields(); - solrReturnFields.resetFetchOptimizer(); - SolrDocument doc = docFetcher.solrDoc(docid, solrReturnFields); - removeCopyFieldTargets(doc, req.getSchema()); + Document luceneDocument = + docFetcher.doc(docid, rsp.getReturnFields().getLuceneFieldNames()); + SolrDocument doc = toSolrDoc(luceneDocument, core.getLatestSchema()); + if (reuseDvIters == null) { + reuseDvIters = new DocValuesIteratorCache(searcherInfo.getSearcher()); + } + docFetcher.decorateDocValueFields( + doc, docid, docFetcher.getNonStoredDVs(true), reuseDvIters); if (null != transformer) { if (null == resultContext) { // either first pass, or we've re-opened searcher - either way now we setContext @@ -613,10 +618,7 @@ private static SolrDocument mergePartialDocWithFullDocFromIndex( private static SolrDocument fetchSolrDoc( SolrIndexSearcher searcher, int docId, ReturnFields returnFields) throws IOException { final SolrDocumentFetcher docFetcher = searcher.getDocFetcher(); - final SolrReturnFields solrReturnFields = (SolrReturnFields) returnFields; - solrReturnFields.resetFetchOptimizer(); - final SolrDocument solrDoc = docFetcher.solrDoc(docId, solrReturnFields); - removeCopyFieldTargets(solrDoc, searcher.getSchema()); + final SolrDocument solrDoc = docFetcher.solrDoc(docId, (SolrReturnFields) returnFields); final DocTransformer transformer = returnFields.getTransformer(); if (transformer != null) { transformer.setContext( diff --git a/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java b/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java index 1a63e5f2a846..bf850f4de8b5 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java +++ b/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java @@ -161,11 +161,6 @@ public RetrieveFieldsOptimizer getFetchOptimizer(Supplier Date: Sat, 23 May 2026 22:00:21 +0530 Subject: [PATCH 3/4] skip list option for Point Field --- .../apache/solr/schema/FieldProperties.java | 4 ++- .../org/apache/solr/schema/FieldType.java | 32 +++++++++++++++++++ .../org/apache/solr/schema/PointField.java | 16 ++++++++-- .../org/apache/solr/schema/SchemaField.java | 7 +++- .../conf/bad-schema-unsupported-skip-list.xml | 23 +++++++++++++ .../solr/collection1/conf/schema_codec.xml | 6 ++++ .../conf/schema_postingsformat.xml | 6 ++++ .../apache/solr/core/TestCodecSupport.java | 22 +++++++++++++ .../solr/rest/schema/TestFieldResource.java | 5 +-- .../solr/schema/BadIndexSchemaTest.java | 6 ++++ .../apache/solr/schema/TestSchemaField.java | 19 +++++++++++ 11 files changed, 140 insertions(+), 6 deletions(-) create mode 100644 solr/core/src/test-files/solr/collection1/conf/bad-schema-unsupported-skip-list.xml diff --git a/solr/core/src/java/org/apache/solr/schema/FieldProperties.java b/solr/core/src/java/org/apache/solr/schema/FieldProperties.java index 91f3caa38e63..ce502a56bcbf 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldProperties.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldProperties.java @@ -52,6 +52,7 @@ public abstract class FieldProperties { protected static final int USE_DOCVALUES_AS_STORED = 0b100000000000000000; protected static final int LARGE_FIELD = 0b1000000000000000000; protected static final int UNINVERTIBLE = 0b10000000000000000000; + protected static final int DOC_VALUES_SKIP_LIST = 0b100000000000000000000; static final String[] propertyNames = { "indexed", @@ -73,7 +74,8 @@ public abstract class FieldProperties { "termPayloads", "useDocValuesAsStored", "large", - "uninvertible" + "uninvertible", + "skipList" }; static final Map propertyMap = new HashMap<>(); diff --git a/solr/core/src/java/org/apache/solr/schema/FieldType.java b/solr/core/src/java/org/apache/solr/schema/FieldType.java index 1452233beee3..24546627ece9 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java @@ -42,6 +42,8 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.document.Field; import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.index.DocValuesSkipIndexType; +import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.index.Term; @@ -1148,6 +1150,26 @@ public void checkSchemaField(final SchemaField field) { if (field.hasDocValues()) { checkSupportsDocValues(); } + if (field.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) { + if (!field.hasDocValues()) { + throw new SolrException( + ErrorCode.SERVER_ERROR, + "Field " + field.getName() + " cannot use skipList=true without docValues=true"); + } + final DocValuesType docValuesType = getDocValuesTypeForSkipIndex(field); + if (docValuesType != DocValuesType.NUMERIC && docValuesType != DocValuesType.SORTED_NUMERIC) { + throw new SolrException( + ErrorCode.SERVER_ERROR, + "Field " + + field.getName() + + " of type " + + this + + " cannot use skipList=true because it is currently only supported on PointField" + + "-based numeric and date fields; docValues type " + + docValuesType + + " is unsupported"); + } + } if (field.isLarge() && field.multiValued()) { throw new SolrException( ErrorCode.SERVER_ERROR, "Field type " + this + " is 'large'; can't support multiValued"); @@ -1167,6 +1189,16 @@ protected void checkSupportsDocValues() { ErrorCode.SERVER_ERROR, "Field type " + this + " does not support doc values"); } + /** + * Returns the concrete docValues type used for the field when indexing. Field types that support + * {@code skipList=true} must override this method so schema validation can reject unsupported + * docValues shapes during core load. The default implementation means the field type does not + * currently support {@code skipList=true}. + */ + protected DocValuesType getDocValuesTypeForSkipIndex(SchemaField field) { + return DocValuesType.NONE; + } + /** * Returns whether this field type should enable docValues by default for schemaVersion >= 1.7. * This should not be enabled for fields that did not have docValues implemented by Solr 9.7, as diff --git a/solr/core/src/java/org/apache/solr/schema/PointField.java b/solr/core/src/java/org/apache/solr/schema/PointField.java index e74e73f13207..ae126579b10c 100644 --- a/solr/core/src/java/org/apache/solr/schema/PointField.java +++ b/solr/core/src/java/org/apache/solr/schema/PointField.java @@ -26,6 +26,7 @@ import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexableField; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.IndexOrDocValuesQuery; @@ -292,7 +293,10 @@ public List createFields(SchemaField sf, Object value) { assert numericValue instanceof Double; bits = Double.doubleToLongBits(numericValue.doubleValue()); } - fields.add(new NumericDocValuesField(sf.getName(), bits)); + fields.add( + sf.hasDocValuesSkipList() + ? NumericDocValuesField.indexedField(sf.getName(), bits) + : new NumericDocValuesField(sf.getName(), bits)); } else { // MultiValued if (numericValue instanceof Integer || numericValue instanceof Long) { @@ -303,7 +307,10 @@ public List createFields(SchemaField sf, Object value) { assert numericValue instanceof Double; bits = NumericUtils.doubleToSortableLong(numericValue.doubleValue()); } - fields.add(new SortedNumericDocValuesField(sf.getName(), bits)); + fields.add( + sf.hasDocValuesSkipList() + ? SortedNumericDocValuesField.indexedField(sf.getName(), bits) + : new SortedNumericDocValuesField(sf.getName(), bits)); } } if (sf.stored()) { @@ -314,6 +321,11 @@ public List createFields(SchemaField sf, Object value) { protected abstract StoredField getStoredField(SchemaField sf, Object value); + @Override + protected DocValuesType getDocValuesTypeForSkipIndex(SchemaField field) { + return field.multiValued() ? DocValuesType.SORTED_NUMERIC : DocValuesType.NUMERIC; + } + @Override public SortField getSortField(SchemaField field, boolean top) { return getNumericSort(field, getNumberType(), top); diff --git a/solr/core/src/java/org/apache/solr/schema/SchemaField.java b/solr/core/src/java/org/apache/solr/schema/SchemaField.java index ef3c5559affe..5c186f27e210 100644 --- a/solr/core/src/java/org/apache/solr/schema/SchemaField.java +++ b/solr/core/src/java/org/apache/solr/schema/SchemaField.java @@ -244,6 +244,10 @@ public String getDocValuesFormat() { return (String) args.getOrDefault(DOC_VALUES_FORMAT, type.getDocValuesFormat()); } + public boolean hasDocValuesSkipList() { + return (properties & DOC_VALUES_SKIP_LIST) != 0; + } + /** * Sanity checks that the properties of this field type are plausible for a field that may be used * in sorting, throwing an appropriate exception (including the field name) if it is not. @@ -466,6 +470,7 @@ public SimpleOrderedMap getNamedPropertyValues(boolean showDefaults) { properties.add(getPropertyName(REQUIRED), isRequired()); properties.add(getPropertyName(TOKENIZED), isTokenized()); properties.add(getPropertyName(USE_DOCVALUES_AS_STORED), useDocValuesAsStored()); + properties.add(getPropertyName(DOC_VALUES_SKIP_LIST), hasDocValuesSkipList()); // The BINARY property is always false // properties.add(getPropertyName(BINARY), isBinary()); } else { @@ -534,7 +539,7 @@ public DocValuesType docValuesType() { @Override public DocValuesSkipIndexType docValuesSkipIndexType() { - return DocValuesSkipIndexType.NONE; + return hasDocValuesSkipList() ? DocValuesSkipIndexType.RANGE : DocValuesSkipIndexType.NONE; } @Override diff --git a/solr/core/src/test-files/solr/collection1/conf/bad-schema-unsupported-skip-list.xml b/solr/core/src/test-files/solr/collection1/conf/bad-schema-unsupported-skip-list.xml new file mode 100644 index 000000000000..7c9097c65855 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/bad-schema-unsupported-skip-list.xml @@ -0,0 +1,23 @@ + + + + + + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml b/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml index c442cdd7bfca..2abc769eaa1c 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml @@ -25,6 +25,8 @@ + + @@ -41,6 +43,8 @@ + + @@ -49,6 +53,8 @@ + + string_f diff --git a/solr/core/src/test-files/solr/collection1/conf/schema_postingsformat.xml b/solr/core/src/test-files/solr/collection1/conf/schema_postingsformat.xml index 32dd7403d28b..00dd2887e7b7 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema_postingsformat.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema_postingsformat.xml @@ -20,6 +20,8 @@ + + @@ -30,6 +32,8 @@ + + @@ -37,5 +41,7 @@ + + diff --git a/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java b/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java index 919777dc3e79..7dd60f504fe0 100644 --- a/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java +++ b/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java @@ -24,6 +24,8 @@ import org.apache.lucene.codecs.lucene104.Lucene104Codec; import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; +import org.apache.lucene.index.DocValuesSkipIndexType; +import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.tests.util.TestUtil; @@ -107,6 +109,26 @@ public void testDynamicFieldsDocValuesFormats() { assertEquals("Asserting", format.getDocValuesFormatForField("bar_direct").getName()); } + public void testDocValuesSkipListPersistsFieldInfo() throws Exception { + assertU(delQ("*:*")); + assertU(commit()); + assertU(add(doc("string_f", "id", "int_skip_f", "7", "long_skip_mv_f", "11"))); + assertU(commit()); + + h.getCore() + .withSearcher( + searcher -> { + FieldInfos fieldInfos = FieldInfos.getMergedFieldInfos(searcher.getIndexReader()); + assertEquals( + DocValuesSkipIndexType.RANGE, + fieldInfos.fieldInfo("int_skip_f").docValuesSkipIndexType()); + assertEquals( + DocValuesSkipIndexType.RANGE, + fieldInfos.fieldInfo("long_skip_mv_f").docValuesSkipIndexType()); + return null; + }); + } + private void reloadCoreAndRecreateIndex() { h.getCoreContainer().reload(h.coreName); assertU(delQ("*:*")); diff --git a/solr/core/src/test/org/apache/solr/rest/schema/TestFieldResource.java b/solr/core/src/test/org/apache/solr/rest/schema/TestFieldResource.java index 5eb97247a399..1e629f2f8f6c 100644 --- a/solr/core/src/test/org/apache/solr/rest/schema/TestFieldResource.java +++ b/solr/core/src/test/org/apache/solr/rest/schema/TestFieldResource.java @@ -25,7 +25,7 @@ public void testGetField() { assertQ( "/schema/fields/test_postv?indent=on&wt=xml&showDefaults=true", "count(/response/lst[@name='field']) = 1", - "count(/response/lst[@name='field']/*) = 19", + "count(/response/lst[@name='field']/*) = 20", "/response/lst[@name='field']/str[@name='name'] = 'test_postv'", "/response/lst[@name='field']/str[@name='type'] = 'text'", "/response/lst[@name='field']/bool[@name='indexed'] = 'true'", @@ -44,7 +44,8 @@ public void testGetField() { "/response/lst[@name='field']/bool[@name='large'] = 'false'", "/response/lst[@name='field']/bool[@name='required'] = 'false'", "/response/lst[@name='field']/bool[@name='tokenized'] = 'true'", - "/response/lst[@name='field']/bool[@name='useDocValuesAsStored'] = 'true'"); + "/response/lst[@name='field']/bool[@name='useDocValuesAsStored'] = 'true'", + "/response/lst[@name='field']/bool[@name='skipList'] = 'false'"); } @Test diff --git a/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java b/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java index 3f3e372399ae..13fa19810f0f 100644 --- a/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java +++ b/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java @@ -121,6 +121,12 @@ public void testDocValuesUnsupported() throws Exception { doTest("bad-schema-unsupported-docValues.xml", "does not support doc values"); } + public void testDocValuesSkipListUnsupported() throws Exception { + doTest( + "bad-schema-unsupported-skip-list.xml", + "currently only supported on PointField-based numeric and date fields"); + } + public void testRootTypeMissmatchWithUniqueKey() throws Exception { doTest( "bad-schema-uniquekey-diff-type-root.xml", diff --git a/solr/core/src/test/org/apache/solr/schema/TestSchemaField.java b/solr/core/src/test/org/apache/solr/schema/TestSchemaField.java index e404c48a238a..f4553e44bc0d 100644 --- a/solr/core/src/test/org/apache/solr/schema/TestSchemaField.java +++ b/solr/core/src/test/org/apache/solr/schema/TestSchemaField.java @@ -75,6 +75,8 @@ public void testFields() { assertFieldFormats("str_none_asserting_f", null, "Asserting"); assertFieldFormats("str_standard_asserting_f", "Lucene84", "Asserting"); + assertFieldHasSkipList("int_skip_f", true); + assertFieldHasSkipList("long_skip_mv_f", true); } public void testDynamicFields() { @@ -84,6 +86,23 @@ public void testDynamicFields() { assertFieldFormats("any_asserting", null, "Asserting"); assertFieldFormats("any_simple", "Direct", "Lucene80"); + assertFieldHasSkipList("any_skip_i", true); + assertFieldHasSkipList("any_skip_l", true); + } + + private void assertFieldHasSkipList(String fieldName, boolean expectedSkipList) { + SchemaField field = h.getCore().getLatestSchema().getField(fieldName); + assertNotNull("Field " + fieldName + " not found - schema got changed?", field); + final String skipListPropertyName = + FieldProperties.getPropertyName(FieldProperties.DOC_VALUES_SKIP_LIST); + assertEquals( + "Field " + field.getName() + " wrong " + skipListPropertyName + " value", + expectedSkipList, + field.hasDocValuesSkipList()); + assertEquals( + "Field " + field.getName() + " wrong schema property value for " + skipListPropertyName, + expectedSkipList, + field.getNamedPropertyValues(true).get(skipListPropertyName)); } private void assertFieldFormats( From df0ba86e44a0ed9f5e53b5ef4b15859c5e3cdec7 Mon Sep 17 00:00:00 2001 From: Sanjay Dutt Date: Sat, 23 May 2026 23:45:37 +0530 Subject: [PATCH 4/4] remove unrelated changs --- .../apache/solr/schema/DenseVectorField.java | 28 +--- .../solr/search/SolrDocumentFetcher.java | 141 +----------------- ...densevector-derived-stored-multivalued.xml | 22 --- .../schema-densevector-derived-stored.xml | 54 ------- .../solr/schema/DenseVectorFieldTest.java | 69 --------- .../pages/dense-vector-search.adoc | 19 --- 6 files changed, 6 insertions(+), 327 deletions(-) delete mode 100644 solr/core/src/test-files/solr/collection1/conf/bad-schema-densevector-derived-stored-multivalued.xml delete mode 100644 solr/core/src/test-files/solr/collection1/conf/schema-densevector-derived-stored.xml diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index 503f82815d70..f29714d5b1ee 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -78,7 +78,6 @@ public class DenseVectorField extends FloatPointField { static final String HNSW_M = "hnswM"; static final String HNSW_EF_CONSTRUCTION = "hnswEfConstruction"; static final String VECTOR_ENCODING = "vectorEncoding"; - static final String USE_VECTOR_VALUES_AS_STORED = "useVectorValuesAsStored"; static final VectorEncoding DEFAULT_VECTOR_ENCODING = VectorEncoding.FLOAT32; static final String KNN_SIMILARITY_FUNCTION = "similarityFunction"; static final VectorSimilarityFunction DEFAULT_SIMILARITY = VectorSimilarityFunction.EUCLIDEAN; @@ -118,8 +117,6 @@ public class DenseVectorField extends FloatPointField { */ private VectorEncoding vectorEncoding; - private boolean useVectorValuesAsStored; - private int cuvsWriterThreads; private int cuvsIntGraphDegree; private int cuvsGraphDegree; @@ -190,10 +187,6 @@ public void init(IndexSchema schema, Map args) { .orElse(DEFAULT_VECTOR_ENCODING); args.remove(VECTOR_ENCODING); - this.useVectorValuesAsStored = - ofNullable(args.get(USE_VECTOR_VALUES_AS_STORED)).map(Boolean::parseBoolean).orElse(false); - args.remove(USE_VECTOR_VALUES_AS_STORED); - this.hnswM = ofNullable(args.get(HNSW_M)) .map(Integer::parseInt) @@ -316,10 +309,6 @@ public int getCuvsHnswEfConstruction() { return cuvsHnswEfConstruction; } - public boolean useVectorValuesAsStored() { - return useVectorValuesAsStored; - } - @Override protected boolean enableDocValuesByDefault() { return false; @@ -335,21 +324,6 @@ public void checkSchemaField(final SchemaField field) throws SolrException { getClass().getSimpleName() + " fields can not have docValues: " + field.getName()); } - if (useVectorValuesAsStored) { - if (!field.stored()) { - throw new SolrException( - SolrException.ErrorCode.SERVER_ERROR, - USE_VECTOR_VALUES_AS_STORED + " requires stored=true for field " + field.getName()); - } - if (field.multiValued()) { - throw new SolrException( - SolrException.ErrorCode.SERVER_ERROR, - USE_VECTOR_VALUES_AS_STORED - + " is not supported for multiValued DenseVectorField: " - + field.getName()); - } - } - switch (vectorEncoding) { case FLOAT32: if (dimension > KnnVectorsFormat.DEFAULT_MAX_DIMENSIONS) { @@ -386,7 +360,7 @@ public List createFields(SchemaField field, Object value) { if (field.indexed()) { fields.add(createField(field, vectorBuilder)); } - if (field.stored() && !useVectorValuesAsStored) { + if (field.stored()) { switch (vectorEncoding) { case FLOAT32: fields.ensureCapacity(vectorBuilder.getFloatVector().length + 1); diff --git a/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java b/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java index 02f87cf1cc9c..3366d5241ef5 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java @@ -43,15 +43,12 @@ import org.apache.lucene.document.StoredValue; import org.apache.lucene.document.TextField; import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableFieldType; -import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; @@ -71,7 +68,6 @@ import org.apache.solr.response.DocsStreamer; import org.apache.solr.response.ResultContext; import org.apache.solr.schema.BoolField; -import org.apache.solr.schema.DenseVectorField; import org.apache.solr.schema.EnumFieldType; import org.apache.solr.schema.LatLonPointSpatialField; import org.apache.solr.schema.NumberType; @@ -99,7 +95,6 @@ public class SolrDocumentFetcher { private final Set allStored; private final Set dvsCanSubstituteStored; - private final Set derivedStoredVectorFields; /** Contains the names/patterns of all docValues=true,stored=false fields in the schema. */ private final Set allNonStoredDVs; @@ -138,7 +133,6 @@ private SolrDocumentFetcher(SolrDocumentFetcher template, StoredFields storedFie this.largeFields = template.largeFields; this.dvsCanSubstituteStored = template.dvsCanSubstituteStored; this.allStored = template.allStored; - this.derivedStoredVectorFields = template.derivedStoredVectorFields; this.storedHighlightFieldNames = template.indexedFieldNames; this.indexedFieldNames = template.indexedFieldNames; this.storedFields = storedFields; @@ -175,15 +169,6 @@ protected SolrDocumentFetcher clone() { final Set storedLargeFields = new HashSet<>(); final Set dvsCanSubstituteStored = new HashSet<>(); final Set allStoreds = new HashSet<>(); - final Set derivedStoredVectors = new HashSet<>(); - - for (SchemaField schemaField : searcher.getSchema().getFields().values()) { - if (schemaField.getType() instanceof DenseVectorField vectorField - && schemaField.stored() - && vectorField.useVectorValuesAsStored()) { - derivedStoredVectors.add(schemaField.getName()); - } - } // can find materialized dynamic fields, unlike using the Solr IndexSchema. for (FieldInfo fieldInfo : searcher.getFieldInfos()) { @@ -197,11 +182,6 @@ protected SolrDocumentFetcher clone() { if (schemaField.stored()) { allStoreds.add(fieldInfo.name); } - if (schemaField.getType() instanceof DenseVectorField vectorField - && schemaField.stored() - && vectorField.useVectorValuesAsStored()) { - derivedStoredVectors.add(fieldInfo.name); - } if (!schemaField.stored() && schemaField.hasDocValues()) { if (schemaField.useDocValuesAsStored()) { nonStoredDVsUsedAsStored.add(fieldInfo.name); @@ -223,7 +203,6 @@ protected SolrDocumentFetcher clone() { this.largeFields = Collections.unmodifiableSet(storedLargeFields); this.dvsCanSubstituteStored = Collections.unmodifiableSet(dvsCanSubstituteStored); this.allStored = Collections.unmodifiableSet(allStoreds); - this.derivedStoredVectorFields = Collections.unmodifiableSet(derivedStoredVectors); this.storedFields = null; // template docFetcher should throw NPE if used directly this.storedHighlightFieldNames = new Collection[1]; this.indexedFieldNames = new Collection[1]; @@ -789,7 +768,7 @@ public Set getNonStoredDVsWithoutCopyTargets() { } /** - * Moved as a private class here, we consider it an implementation detail. It should not be + * Moved as a private class here, we consider it an impelmentation detail. It should not be * exposed outside of this class. * *

This class is in charge of insuring that SolrDocuments can have their fields populated @@ -801,7 +780,6 @@ class RetrieveFieldsOptimizer { private final Set storedFields; // always non null private final Set dvFields; - private final Set vectorFields; private final SolrReturnFields solrReturnFields; @@ -810,7 +788,6 @@ class RetrieveFieldsOptimizer { RetrieveFieldsOptimizer(SolrReturnFields solrReturnFields) { this.storedFields = calcStoredFieldsForReturn(solrReturnFields); this.dvFields = calcDocValueFieldsForReturn(solrReturnFields); - this.vectorFields = calcDerivedVectorFieldsForReturn(solrReturnFields); this.solrReturnFields = solrReturnFields; if (storedFields != null && dvsCanSubstituteStored.containsAll(storedFields)) { @@ -853,27 +830,11 @@ private Set calcStoredFieldsForReturn(ReturnFields returnFields) { if (returnFields.wantsAllFields()) { return null; } else if (returnFields.hasPatternMatching()) { - if (fnames == null) { - return null; + for (String s : getAllStored()) { + if (returnFields.wantsField(s)) { + storedFields.add(s); + } } - storedFields.addAll(fnames); - storedFields.removeIf( - (String name) -> { - SchemaField schemaField = searcher.getSchema().getFieldOrNull(name); - if (schemaField == null) { - // Get it from the stored fields if, for some reason, we can't get the schema. - return false; - } - if (schemaField.stored() && schemaField.multiValued()) { - // must return multivalued fields from stored data if possible. - return false; - } - if (schemaField.stored() == false) { - // if it's not stored, no choice but to return from DV. - return true; - } - return false; - }); } else if (fnames != null) { storedFields.addAll(fnames); storedFields.removeIf( @@ -932,96 +893,6 @@ private Set calcDocValueFieldsForReturn(ReturnFields returnFields) { return result; } - private Set calcDerivedVectorFieldsForReturn(ReturnFields returnFields) { - if (derivedStoredVectorFields.isEmpty()) { - return Set.of(); - } - - final Set result = new HashSet<>(); - if (returnFields.wantsAllFields()) { - result.addAll(derivedStoredVectorFields); - } else if (returnFields.hasPatternMatching()) { - for (String field : derivedStoredVectorFields) { - if (returnFields.wantsField(field)) { - result.add(field); - } - } - } else { - Set fnames = returnFields.getLuceneFieldNames(); - if (fnames != null) { - result.addAll(fnames); - result.retainAll(derivedStoredVectorFields); - } else { - for (String field : derivedStoredVectorFields) { - if (returnFields.wantsField(field)) { - result.add(field); - } - } - } - } - return result; - } - - private void decorateDerivedVectorFields(SolrDocument sdoc, int luceneDocId) - throws IOException { - if (vectorFields.isEmpty()) { - return; - } - - final List leafContexts = searcher.getLeafContexts(); - final int subIndex = ReaderUtil.subIndex(luceneDocId, leafContexts); - final LeafReaderContext leafReaderContext = leafContexts.get(subIndex); - final LeafReader leafReader = leafReaderContext.reader(); - final int localId = luceneDocId - leafReaderContext.docBase; - - for (String field : vectorFields) { - if (sdoc.containsKey(field)) { - continue; - } - - SchemaField schemaField = searcher.getSchema().getFieldOrNull(field); - if (schemaField == null - || !(schemaField.getType() instanceof DenseVectorField vectorField)) { - continue; - } - - switch (vectorField.getVectorEncoding()) { - case FLOAT32: - FloatVectorValues floatVectorValues = leafReader.getFloatVectorValues(field); - if (floatVectorValues == null) { - continue; - } - KnnVectorValues.DocIndexIterator floatIterator = floatVectorValues.iterator(); - if (floatIterator.advance(localId) != localId) { - continue; - } - float[] floatVector = floatVectorValues.vectorValue(floatIterator.index()); - List floatValues = new ArrayList<>(floatVector.length); - for (float value : floatVector) { - floatValues.add(value); - } - sdoc.setField(field, floatValues); - break; - case BYTE: - ByteVectorValues byteVectorValues = leafReader.getByteVectorValues(field); - if (byteVectorValues == null) { - continue; - } - KnnVectorValues.DocIndexIterator byteIterator = byteVectorValues.iterator(); - if (byteIterator.advance(localId) != localId) { - continue; - } - byte[] byteVector = byteVectorValues.vectorValue(byteIterator.index()); - List byteValues = new ArrayList<>(byteVector.length); - for (byte value : byteVector) { - byteValues.add((int) value); - } - sdoc.setField(field, byteValues); - break; - } - } - } - private SolrDocument getSolrDoc(int luceneDocId) { SolrDocument sdoc = null; @@ -1032,7 +903,6 @@ private SolrDocument getSolrDoc(int luceneDocId) { sdoc = DocsStreamer.convertLuceneDocToSolrDoc(doc, searcher.getSchema(), getReturnFields()); if (returnDVFields() == false) { - decorateDerivedVectorFields(sdoc, luceneDocId); solrReturnFields.setFieldSources(SolrReturnFields.FIELD_SOURCES.ALL_FROM_STORED); return sdoc; } else { @@ -1048,7 +918,6 @@ private SolrDocument getSolrDoc(int luceneDocId) { if (returnDVFields()) { decorateDocValueFields(sdoc, luceneDocId, getDvFields(), reuseDvIters); } - decorateDerivedVectorFields(sdoc, luceneDocId); } catch (IOException e) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, diff --git a/solr/core/src/test-files/solr/collection1/conf/bad-schema-densevector-derived-stored-multivalued.xml b/solr/core/src/test-files/solr/collection1/conf/bad-schema-densevector-derived-stored-multivalued.xml deleted file mode 100644 index f7156a3179e8..000000000000 --- a/solr/core/src/test-files/solr/collection1/conf/bad-schema-densevector-derived-stored-multivalued.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-densevector-derived-stored.xml b/solr/core/src/test-files/solr/collection1/conf/schema-densevector-derived-stored.xml deleted file mode 100644 index c90a4a58effb..000000000000 --- a/solr/core/src/test-files/solr/collection1/conf/schema-densevector-derived-stored.xml +++ /dev/null @@ -1,54 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - id - diff --git a/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java b/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java index 08c4900dbccd..f84eaf40ef0b 100644 --- a/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java +++ b/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java @@ -93,14 +93,6 @@ public void fieldDefinition_docValues_shouldThrowException() throws Exception { "DenseVectorField fields can not have docValues: vector"); } - @Test - public void fieldDefinition_derivedStoredMultiValued_shouldThrowException() throws Exception { - assertConfigs( - "solrconfig-basic.xml", - "bad-schema-densevector-derived-stored-multivalued.xml", - "useVectorValuesAsStored is not supported for multiValued DenseVectorField: vector"); - } - @Test public void fieldTypeDefinition_nullSimilarityDistance_shouldUseDefaultSimilarityEuclidean() throws Exception { @@ -706,67 +698,6 @@ public void denseVectorFieldOnAtomicUpdate_shouldBeUpdatedCorrectly() throws Exc } } - @Test - public void denseVectorField_useVectorValuesAsStored_shouldReturnVectorsInQueryResults() - throws Exception { - try { - initCore("solrconfig_codec.xml", "schema-densevector-derived-stored.xml"); - assertU( - adoc( - sdoc( - "id", - "0", - "vector", - Arrays.asList(1.1, 2.2, 3.3, 4.4), - "vector_byte_encoding", - Arrays.asList(5, 6, 7, 8), - "string_field", - "test"))); - assertU(commit()); - - assertJQ( - req("q", "id:0", "fl", "*"), - "/response/docs/[0]/vector==[1.1,2.2,3.3,4.4]", - "/response/docs/[0]/vector_byte_encoding==[5,6,7,8]", - "/response/docs/[0]/string_field=='test'"); - } finally { - deleteCore(); - } - } - - @Test - public void denseVectorField_useVectorValuesAsStored_shouldPreserveVectorsAfterAtomicUpdate() - throws Exception { - assumeTrue( - "update log must be enabled for atomic update", - Boolean.getBoolean(System.getProperty("solr.index.updatelog.enabled"))); - try { - initCore("solrconfig.xml", "schema-densevector-derived-stored.xml"); - SolrInputDocument doc = new SolrInputDocument(); - doc.addField("id", "0"); - doc.addField("vector", Arrays.asList(1.1, 2.2, 3.3, 4.4)); - doc.addField("vector_byte_encoding", Arrays.asList(5, 6, 7, 8)); - doc.addField("string_field", "test"); - - assertU(adoc(doc)); - assertU(commit()); - - SolrInputDocument updateDoc = new SolrInputDocument(); - updateDoc.addField("id", "0"); - updateDoc.addField("string_field", Map.of("set", "other test")); - assertU(adoc(updateDoc)); - assertU(commit()); - - assertJQ( - req("q", "id:0", "fl", "*"), - "/response/docs/[0]/vector==[1.1,2.2,3.3,4.4]", - "/response/docs/[0]/vector_byte_encoding==[5,6,7,8]", - "/response/docs/[0]/string_field=='other test'"); - } finally { - deleteCore(); - } - } - @Test public void denseVectorByteEncoding_shouldRaiseExceptionWithValuesOutsideBoundaries() throws Exception { diff --git a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc index b931ae8b0c6f..2681cc598409 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc @@ -141,19 +141,6 @@ Please note that the `knnAlgorithm` accepted values may change in future release + Accepted values: `FLOAT32`, `BYTE`. -`useVectorValuesAsStored`:: -+ -[%autowidth,frame=none] -|=== -|Optional |Default: `false` -|=== -+ -(advanced) When `true`, Solr preserves `stored="true"` retrieval semantics for the field while deriving the returned vector value from Lucene's vector index instead of writing a redundant stored-field copy at index time. -+ -This can reduce storage overhead for single-valued dense vector fields. -+ -Accepted values: `true`, `false`. - `hnswM`:: + @@ -189,12 +176,6 @@ Any integer. `DenseVectorField` supports the attributes: `indexed`, `stored`, `multivalued`. -If `useVectorValuesAsStored="true"` is enabled, the field must remain `stored="true"` and must be single-valued. - -When `useVectorValuesAsStored="true"` is enabled, Solr reconstructs the returned vector from Lucene's vector index at fetch time. -This is typically a good fit when you do not routinely return the vector field in latency-sensitive traffic, but still want to reduce duplicate stored vector bytes for standard query-time fetches. -If your application commonly uses `fl=*` or otherwise returns the vector field on high-volume request paths, benchmark carefully before enabling this option. - Here's how a `DenseVectorField` should be indexed when single valued: [tabs#densevectorfield-index]