diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index e3d07b1cbca7..d5145fc664dc 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -19,6 +19,8 @@ import static org.apache.lucene.index.IndexOptions.DOCS; import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS; import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; +import static org.apache.solr.common.params.CommonParams.DISTRIB; +import static org.apache.solr.common.params.CommonParams.PATH; import java.io.IOException; import java.lang.invoke.MethodHandles; @@ -28,11 +30,13 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Base64; +import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; @@ -67,15 +71,23 @@ import org.apache.lucene.util.PriorityQueue; import org.apache.solr.analysis.TokenizerChain; import org.apache.solr.client.api.model.CoreStatusResponse; +import org.apache.solr.client.solrj.response.LukeResponse; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.luke.FieldFlag; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.core.SolrCore; import org.apache.solr.handler.RequestHandlerBase; import org.apache.solr.handler.api.V2ApiUtils; +import org.apache.solr.handler.component.ResponseBuilder; +import org.apache.solr.handler.component.ShardHandler; +import org.apache.solr.handler.component.ShardHandlerFactory; +import org.apache.solr.handler.component.ShardRequest; +import org.apache.solr.handler.component.ShardResponse; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.CopyField; @@ -85,6 +97,7 @@ import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.security.AuthorizationContext; import org.apache.solr.update.SolrIndexWriter; +import org.apache.solr.util.plugin.SolrCoreAware; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -97,7 +110,7 @@ * @see SegmentsInfoRequestHandler * @since solr 1.2 */ -public class LukeRequestHandler extends RequestHandlerBase { +public class LukeRequestHandler extends RequestHandlerBase implements SolrCoreAware { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public static final String NUMTERMS = "numTerms"; @@ -108,6 +121,36 @@ public class LukeRequestHandler extends RequestHandlerBase { static final int HIST_ARRAY_SIZE = 33; + // Response section keys + private static final String RSP_INDEX = "index"; + private static final String RSP_FIELDS = "fields"; + private static final String RSP_SCHEMA = "schema"; + private static final String RSP_INFO = "info"; + private static final String RSP_DOC = "doc"; + private static final String RSP_SHARDS = "shards"; + + // Field-level keys + private static final String KEY_NUM_DOCS = "numDocs"; + private static final String KEY_MAX_DOC = "maxDoc"; + private static final String KEY_DELETED_DOCS = "deletedDocs"; + private static final String KEY_SEGMENT_COUNT = "segmentCount"; + private static final String KEY_TYPE = "type"; + private static final String KEY_SCHEMA_FLAGS = "schema"; + private static final String KEY_DOCS = "docs"; + private static final String KEY_DOCS_AS_LONG = "docsAsLong"; + private static final String KEY_DISTINCT = "distinct"; + private static final String KEY_TOP_TERMS = "topTerms"; + private static final String KEY_DYNAMIC_BASE = "dynamicBase"; + private static final String KEY_INDEX_FLAGS = "index"; + private static final String KEY_HISTOGRAM = "histogram"; + + private ShardHandlerFactory shardHandlerFactory; + + @Override + public void inform(SolrCore core) { + this.shardHandlerFactory = core.getCoreContainer().getShardHandlerFactory(); + } + @Override public Name getPermissionName(AuthorizationContext request) { return Name.READ_PERM; @@ -123,7 +166,7 @@ public static ShowStyle get(String v) { if (v == null) return null; if ("schema".equalsIgnoreCase(v)) return SCHEMA; if ("index".equalsIgnoreCase(v)) return INDEX; - if ("doc".equalsIgnoreCase(v)) return DOC; + if (RSP_DOC.equalsIgnoreCase(v)) return DOC; if ("all".equalsIgnoreCase(v)) return ALL; throw new SolrException(ErrorCode.BAD_REQUEST, "Unknown Show Style: " + v); } @@ -131,16 +174,23 @@ public static ShowStyle get(String v) { @Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { + SolrParams params = req.getParams(); + + if (params.getBool(DISTRIB, false) + && req.getCoreContainer().isZooKeeperAware() + && handleDistributed(req, rsp)) { + return; + } + IndexSchema schema = req.getSchema(); SolrIndexSearcher searcher = req.getSearcher(); DirectoryReader reader = searcher.getIndexReader(); - SolrParams params = req.getParams(); ShowStyle style = ShowStyle.get(params.get("show")); // If no doc is given, show all fields and top terms final var indexVals = new SimpleOrderedMap<>(); V2ApiUtils.squashIntoNamedList(indexVals, getIndexInfo(reader)); - rsp.add("index", indexVals); + rsp.add(RSP_INDEX, indexVals); if (ShowStyle.INDEX == style) { return; // that's all we need @@ -153,14 +203,10 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw String v = uniqueKey.getType().toInternal(params.get(ID)); Term t = new Term(uniqueKey.getName(), v); docId = searcher.getFirstMatch(t); - if (docId < 0) { - throw new SolrException( - SolrException.ErrorCode.NOT_FOUND, "Can't find document: " + params.get(ID)); - } } // Read the document from the index - if (docId != null) { + if (docId != null && docId > -1) { if (style != null && style != ShowStyle.DOC) { throw new SolrException(ErrorCode.BAD_REQUEST, "missing doc param for doc style"); } @@ -179,11 +225,11 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw docinfo.add("docId", docId); docinfo.add("lucene", info); docinfo.add("solr", doc); - rsp.add("doc", docinfo); + rsp.add(RSP_DOC, docinfo); } else if (ShowStyle.SCHEMA == style) { - rsp.add("schema", getSchemaInfo(req.getSchema())); + rsp.add(RSP_SCHEMA, getSchemaInfo(req.getSchema())); } else { - rsp.add("fields", getIndexedFieldsInfo(req)); + rsp.add(RSP_FIELDS, getIndexedFieldsInfo(req)); } // Add some generally helpful information @@ -192,8 +238,343 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw info.add( "NOTE", "Document Frequency (df) is not updated when a document is marked for deletion. df values include deleted documents."); - rsp.add("info", info); + rsp.add(RSP_INFO, info); + rsp.setHttpCaching(false); + } + + /** Per-field accumulation state across shards: merged response data and field validation. */ + private static class MergedFieldData { + final SimpleOrderedMap merged = new SimpleOrderedMap<>(); + final String originalShardAddr; + final LukeResponse.FieldInfo originalFieldInfo; + private Object indexFlags; + private String indexFlagsShardAddr; + + MergedFieldData(String shardAddr, LukeResponse.FieldInfo fieldInfo) { + this.originalShardAddr = shardAddr; + this.originalFieldInfo = fieldInfo; + Object flags = fieldInfo.getExtras().get(KEY_INDEX_FLAGS); + if (flags != null) { + this.indexFlags = flags; + this.indexFlagsShardAddr = shardAddr; + } + } + } + + private static class ShardData { + final String shardAddr; + final Map shardFieldInfo; + private NamedList indexInfo; + private SimpleOrderedMap detailedFields; + + ShardData(String shardAddr, Map shardFieldInfo) { + this.shardAddr = shardAddr; + this.shardFieldInfo = shardFieldInfo; + } + + void setIndexInfo(NamedList indexInfo) { + this.indexInfo = indexInfo; + } + + void addDetailedFieldInfo(String fieldName, SimpleOrderedMap fieldStats) { + if (detailedFields == null) { + detailedFields = new SimpleOrderedMap<>(); + } + detailedFields.add(fieldName, fieldStats); + } + + SimpleOrderedMap toResponseEntry() { + SimpleOrderedMap entry = new SimpleOrderedMap<>(); + if (indexInfo != null) { + entry.add(RSP_INDEX, indexInfo); + } + if (detailedFields != null) { + entry.add(RSP_FIELDS, detailedFields); + } + return entry; + } + } + + /** + * @return true if the request was handled in distributed mode, false if prepDistributed + * short-circuited (e.g. single-shard collection) and the caller should fall through to local + * logic. + */ + private boolean handleDistributed(SolrQueryRequest req, SolrQueryResponse rsp) { + SolrParams reqParams = req.getParams(); + + // docId is a Lucene-internal integer, not meaningful across shards + if (reqParams.getInt(DOC_ID) != null) { + throw new SolrException( + ErrorCode.BAD_REQUEST, + "docId parameter is not supported in distributed mode." + + " Use the id parameter to look up documents by their Solr unique key."); + } + + ShardHandler shardHandler = shardHandlerFactory.getShardHandler(); + ResponseBuilder rb = new ResponseBuilder(req, rsp, Collections.emptyList()); + shardHandler.prepDistributed(rb); + + String[] shards = rb.shards; + if (shards == null || shards.length == 0) { + return false; + } + + ShardRequest sreq = new ShardRequest(); + sreq.shards = shards; + sreq.actualShards = shards; + sreq.responses = new ArrayList<>(shards.length); + + String reqPath = (String) req.getContext().get(PATH); + + for (String shard : shards) { + ModifiableSolrParams params = new ModifiableSolrParams(reqParams); + params.set(CommonParams.QT, reqPath); + ShardHandler.setShardAttributesToParams(params, sreq.purpose); + shardHandler.submit(sreq, shard, params); + } + + ShardResponse lastSrsp = shardHandler.takeCompletedOrError(); + if (lastSrsp == null) { + throw new SolrException(ErrorCode.SERVER_ERROR, "No responses received from shards"); + } + List responses = sreq.responses; + for (ShardResponse srsp : responses) { + if (srsp.getException() != null) { + shardHandler.cancelAll(); + if (srsp.getException() instanceof SolrException) { + throw (SolrException) srsp.getException(); + } + throw new SolrException(ErrorCode.SERVER_ERROR, srsp.getException()); + } + } + + mergeDistributedResponses(rsp, responses); rsp.setHttpCaching(false); + return true; + } + + private static String shardAddress(ShardResponse srsp) { + return srsp.getShardAddress() != null ? srsp.getShardAddress() : srsp.getShard(); + } + + private void mergeDistributedResponses(SolrQueryResponse rsp, List responses) { + + if (!responses.isEmpty()) { + ShardResponse firstRsp = responses.getFirst(); + NamedList firstShardRsp = firstRsp.getSolrResponse().getResponse(); + if (firstShardRsp == null) { + throw new SolrException( + ErrorCode.SERVER_ERROR, + "Unexpected empty response from shard: " + shardAddress(firstRsp)); + } + Object schema = firstShardRsp.get(RSP_SCHEMA); + if (schema != null) { + rsp.add(RSP_SCHEMA, schema); + } + Object info = firstShardRsp.get(RSP_INFO); + if (info != null) { + rsp.add(RSP_INFO, info); + } + } + + long totalNumDocs = 0; + int totalMaxDoc = 0; + long totalDeletedDocs = 0; + int totalSegmentCount = 0; + Map mergedFields = new HashMap<>(); + String firstDocShard = null; + Object firstDoc = null; + List shardDataList = new ArrayList<>(); + + for (ShardResponse srsp : responses) { + NamedList shardRsp = srsp.getSolrResponse().getResponse(); + LukeResponse lukeRsp = new LukeResponse(); + lukeRsp.setResponse(shardRsp); + // Only process field info if the shard explicitly included it in its response. + // LukeResponse.getFieldInfo() falls back to schema.fields which has incomplete data. + Map fieldInfo = + shardRsp.get(RSP_FIELDS) != null ? lukeRsp.getFieldInfo() : null; + ShardData shardData = new ShardData(shardAddress(srsp), fieldInfo); + + NamedList shardIndex = lukeRsp.getIndexInfo(); + if (shardIndex != null) { + totalNumDocs += Optional.ofNullable(lukeRsp.getNumDocsAsLong()).orElse(0L); + totalMaxDoc = Math.max(totalMaxDoc, Optional.ofNullable(lukeRsp.getMaxDoc()).orElse(0)); + totalDeletedDocs += Optional.ofNullable(lukeRsp.getDeletedDocsAsLong()).orElse(0L); + Number segCount = (Number) shardIndex.get(KEY_SEGMENT_COUNT); + totalSegmentCount += segCount != null ? segCount.intValue() : 0; + + shardData.setIndexInfo(shardIndex); + } + + processShardFields(shardData, mergedFields); + Object doc = shardRsp.get(RSP_DOC); + if (doc != null) { + if (firstDoc != null) { + throw new SolrException( + ErrorCode.SERVER_ERROR, + "Solr Id of document " + + firstDoc + + " found on multiple shards (" + + firstDocShard + + " and " + + shardAddress(srsp) + + "). The index is corrupt: unique key constraint violated."); + } + firstDoc = doc; + firstDocShard = shardAddress(srsp); + } + shardDataList.add(shardData); + } + + SimpleOrderedMap shardsInfo = new SimpleOrderedMap<>(); + for (ShardData sd : shardDataList) { + SimpleOrderedMap entry = sd.toResponseEntry(); + if (!entry.isEmpty()) { + shardsInfo.add(sd.shardAddr, entry); + } + } + + SimpleOrderedMap mergedIndex = new SimpleOrderedMap<>(); + mergedIndex.add(KEY_NUM_DOCS, totalNumDocs); + mergedIndex.add(KEY_MAX_DOC, totalMaxDoc); + mergedIndex.add(KEY_DELETED_DOCS, totalDeletedDocs); + mergedIndex.add(KEY_SEGMENT_COUNT, totalSegmentCount); + rsp.add(RSP_INDEX, mergedIndex); + + if (firstDoc != null) { + rsp.add(RSP_DOC, firstDoc); + } + if (!mergedFields.isEmpty()) { + SimpleOrderedMap mergedFieldsNL = new SimpleOrderedMap<>(); + for (Map.Entry entry : mergedFields.entrySet()) { + mergedFieldsNL.add(entry.getKey(), entry.getValue().merged); + } + rsp.add(RSP_FIELDS, mergedFieldsNL); + } + + rsp.add(RSP_SHARDS, shardsInfo); + } + + private void processShardFields(ShardData shardData, Map mergedFields) { + if (shardData.shardFieldInfo == null) { + return; + } + for (Map.Entry entry : shardData.shardFieldInfo.entrySet()) { + String fieldName = entry.getKey(); + LukeResponse.FieldInfo fi = entry.getValue(); + + mergeShardField(shardData.shardAddr, fi, mergedFields); + + // Detailed stats — kept per-shard, not merged + NamedList topTerms = fi.getTopTerms(); + if (topTerms != null) { + SimpleOrderedMap detailedFieldInfo = new SimpleOrderedMap<>(); + detailedFieldInfo.add(KEY_TOP_TERMS, topTerms); + detailedFieldInfo.add(KEY_HISTOGRAM, fi.getExtras().get(KEY_HISTOGRAM)); + detailedFieldInfo.add(KEY_DISTINCT, fi.getDistinct()); + shardData.addDetailedFieldInfo(fieldName, detailedFieldInfo); + } + } + } + + private void mergeShardField( + String shardAddr, LukeResponse.FieldInfo fi, Map mergedFields) { + + String fieldName = fi.getName(); + + MergedFieldData fieldData = mergedFields.get(fieldName); + if (fieldData == null) { + fieldData = new MergedFieldData(shardAddr, fi); + mergedFields.put(fieldName, fieldData); + + // First shard to report this field: populate merged with schema-derived attrs + fieldData.merged.add(KEY_TYPE, fi.getType()); + fieldData.merged.add(KEY_SCHEMA_FLAGS, fi.getSchema()); + Object dynBase = fi.getExtras().get(KEY_DYNAMIC_BASE); + if (dynBase != null) { + fieldData.merged.add(KEY_DYNAMIC_BASE, dynBase); + } + if (fieldData.indexFlags != null) { + fieldData.merged.add(KEY_INDEX_FLAGS, fieldData.indexFlags); + } + } else { + // Subsequent shards: validate consistency + validateFieldAttr( + fieldName, + KEY_TYPE, + fi.getType(), + fieldData.originalFieldInfo.getType(), + shardAddr, + fieldData.originalShardAddr); + validateFieldAttr( + fieldName, + KEY_SCHEMA_FLAGS, + fi.getSchema(), + fieldData.originalFieldInfo.getSchema(), + shardAddr, + fieldData.originalShardAddr); + validateFieldAttr( + fieldName, + KEY_DYNAMIC_BASE, + fi.getExtras().get(KEY_DYNAMIC_BASE), + fieldData.originalFieldInfo.getExtras().get(KEY_DYNAMIC_BASE), + shardAddr, + fieldData.originalShardAddr); + + Object indexFlags = fi.getExtras().get(KEY_INDEX_FLAGS); + if (indexFlags != null) { + if (fieldData.indexFlags == null) { + fieldData.indexFlags = indexFlags; + fieldData.indexFlagsShardAddr = shardAddr; + fieldData.merged.add(KEY_INDEX_FLAGS, indexFlags); + } else { + validateFieldAttr( + fieldName, + KEY_INDEX_FLAGS, + indexFlags, + fieldData.indexFlags, + shardAddr, + fieldData.indexFlagsShardAddr); + } + } + } + + Long docsAsLong = fi.getDocsAsLong(); + if (docsAsLong != null) { + fieldData.merged.compute( + KEY_DOCS_AS_LONG, (key, val) -> val == null ? docsAsLong : (Long) val + docsAsLong); + } + } + + /** Validates that a field attribute value is identical across shards. */ + private void validateFieldAttr( + String fieldName, + String attrName, + Object currentVal, + Object expectedVal, + String currentShardAddr, + String expectedShardAddr) { + String currentStr = currentVal != null ? currentVal.toString() : null; + String expectedStr = expectedVal != null ? expectedVal.toString() : null; + if (!Objects.equals(currentStr, expectedStr)) { + throw new SolrException( + ErrorCode.SERVER_ERROR, + "Field '" + + fieldName + + "' has inconsistent '" + + attrName + + "' across shards: '" + + expectedStr + + "' (from " + + expectedShardAddr + + ") vs '" + + currentStr + + "' (from " + + currentShardAddr + + ")"); + } } /** @@ -329,8 +710,8 @@ private static SimpleOrderedMap getDocumentFieldsInfo( SchemaField sfield = schema.getFieldOrNull(field.name()); FieldType ftype = (sfield == null) ? null : sfield.getType(); - f.add("type", (ftype == null) ? null : ftype.getTypeName()); - f.add("schema", getFieldFlags(sfield)); + f.add(KEY_TYPE, (ftype == null) ? null : ftype.getTypeName()); + f.add(KEY_SCHEMA_FLAGS, getFieldFlags(sfield)); f.add("flags", getFieldFlags(field)); f.add("value", (ftype == null) ? null : ftype.toExternal(field)); @@ -417,12 +798,12 @@ private static SimpleOrderedMap getIndexedFieldsInfo(SolrQueryRequest re SchemaField sfield = schema.getFieldOrNull(fieldName); FieldType ftype = (sfield == null) ? null : sfield.getType(); - fieldMap.add("type", (ftype == null) ? null : ftype.getTypeName()); - fieldMap.add("schema", getFieldFlags(sfield)); + fieldMap.add(KEY_TYPE, (ftype == null) ? null : ftype.getTypeName()); + fieldMap.add(KEY_SCHEMA_FLAGS, getFieldFlags(sfield)); if (sfield != null && schema.isDynamicField(sfield.getName()) && schema.getDynamicPattern(sfield.getName()) != null) { - fieldMap.add("dynamicBase", schema.getDynamicPattern(sfield.getName())); + fieldMap.add(KEY_DYNAMIC_BASE, schema.getDynamicPattern(sfield.getName())); } Terms terms = reader.terms(fieldName); // Not indexed, so we need to report what we can (it made it through the fl param if @@ -441,17 +822,16 @@ private static SimpleOrderedMap getIndexedFieldsInfo(SolrQueryRequest re try { IndexableField fld = doc.getField(fieldName); if (fld != null) { - fieldMap.add("index", getFieldFlags(fld)); + fieldMap.add(KEY_INDEX_FLAGS, getFieldFlags(fld)); } else { - // it is a non-stored field... - fieldMap.add("index", "(unstored field)"); + fieldMap.add(KEY_INDEX_FLAGS, "(unstored field)"); } } catch (Exception ex) { log.warn("error reading field: {}", fieldName); } } } - fieldMap.add("docs", terms.getDocCount()); + fieldMap.add(KEY_DOCS, terms.getDocCount()); } if (fields != null && (fields.contains(fieldName) || fields.contains("*"))) { getDetailedFieldInfo(req, fieldName, fieldMap); @@ -472,7 +852,7 @@ private static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws I StoredFields storedFields = reader.storedFields(); // Deal with the chance that the first bunch of terms are in deleted documents. Is there a // better way? - for (int idx = 0; idx < 1000 && postingsEnum == null; ++idx) { + for (int idx = 0; idx < 1000; ++idx) { text = termsEnum.next(); // Ran off the end of the terms enum without finding any live docs with that field in them. if (text == null) { @@ -481,7 +861,7 @@ private static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws I postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); final Bits liveDocs = reader.getLiveDocs(); if (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { - if (liveDocs != null && liveDocs.get(postingsEnum.docID())) { + if (liveDocs != null && !liveDocs.get(postingsEnum.docID())) { continue; } return storedFields.document(postingsEnum.docID()); @@ -728,13 +1108,13 @@ private static void getDetailedFieldInfo( } } tiq.histogram.add(buckets); - fieldMap.add("distinct", tiq.distinctTerms); + fieldMap.add(KEY_DISTINCT, tiq.distinctTerms); // Include top terms - fieldMap.add("topTerms", tiq.toNamedList(req.getSearcher().getSchema())); + fieldMap.add(KEY_TOP_TERMS, tiq.toNamedList(req.getSearcher().getSchema())); // Add a histogram - fieldMap.add("histogram", tiq.histogram.toNamedList()); + fieldMap.add(KEY_HISTOGRAM, tiq.histogram.toNamedList()); } private static List toListOfStrings(SchemaField[] raw) { diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java new file mode 100644 index 000000000000..b42f56d87acd --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -0,0 +1,719 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.admin; + +import static org.apache.solr.common.params.CommonParams.DISTRIB; + +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.request.SolrQuery; +import org.apache.solr.client.solrj.request.schema.SchemaRequest; +import org.apache.solr.client.solrj.response.InputStreamResponseParser; +import org.apache.solr.client.solrj.response.LukeResponse; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.cloud.SolrCloudTestCase; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.embedded.JettySolrRunner; +import org.apache.solr.request.SolrQueryRequestBase; +import org.apache.solr.update.AddUpdateCommand; +import org.apache.solr.update.CommitUpdateCommand; +import org.apache.solr.util.BaseTestHarness; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class LukeRequestHandlerDistribTest extends SolrCloudTestCase { + + private static final String COLLECTION = "lukeDistribTest"; + private static final int NUM_DOCS = 20; + + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(2) + .addConfig("conf", configset("cloud-dynamic")) + .addConfig("managed", configset("cloud-managed")) + .configure(); + + CollectionAdminRequest.createCollection(COLLECTION, "conf", 2, 1) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + cluster.waitForActiveCollection(COLLECTION, 2, 2); + + List docs = new ArrayList<>(); + for (int i = 0; i < NUM_DOCS; i++) { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", String.valueOf(i)); + doc.addField("name", "name_" + i); + doc.addField("subject", "subject value " + (i % 5)); + docs.add(doc); + } + cluster.getSolrClient().add(COLLECTION, docs); + cluster.getSolrClient().commit(COLLECTION); + } + + @AfterClass + public static void afterClass() throws Exception { + shutdownCluster(); + } + + private LukeResponse requestLuke(String collection, ModifiableSolrParams extra) throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("qt", "/admin/luke"); + params.set("numTerms", "0"); + if (extra != null) { + for (Map.Entry entry : extra.getMap().entrySet()) { + params.set(entry.getKey(), entry.getValue()); + } + } + QueryRequest req = new QueryRequest(params); + NamedList raw = cluster.getSolrClient().request(req, collection); + LukeResponse rsp = new LukeResponse(); + rsp.setResponse(raw); + return rsp; + } + + private void assertLukeXPath(String collection, ModifiableSolrParams extra, String... xpaths) + throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("qt", "/admin/luke"); + params.set("numTerms", "0"); + params.set("wt", "xml"); + if (extra != null) { + for (Map.Entry entry : extra.getMap().entrySet()) { + params.set(entry.getKey(), entry.getValue()); + } + } + QueryRequest req = new QueryRequest(params); + req.setResponseParser(new InputStreamResponseParser("xml")); + NamedList raw = cluster.getSolrClient().request(req, collection); + String xml = InputStreamResponseParser.consumeResponseToString(raw); + String failedXpath = BaseTestHarness.validateXPath(xml, xpaths); + assertNull("XPath validation failed: " + failedXpath + "\nResponse:\n" + xml, failedXpath); + } + + @Test + public void testDistributedMerge() throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(DISTRIB, "true"); + + LukeResponse rsp = requestLuke(COLLECTION, params); + + assertEquals( + "merged numDocs should equal total docs", NUM_DOCS, rsp.getNumDocsAsLong().longValue()); + assertTrue("merged maxDoc should be > 0", rsp.getMaxDoc() > 0); + assertNotNull("deletedDocs should be present", rsp.getDeletedDocsAsLong()); + + Map shards = rsp.getShardResponses(); + assertNotNull("shards section should be present", shards); + assertEquals("should have 2 shard entries", 2, shards.size()); + + // Each shard should have its own index info; per-shard numDocs should sum to total + long sumShardDocs = 0; + for (Map.Entry entry : shards.entrySet()) { + LukeResponse shardLuke = entry.getValue(); + assertNotNull("each shard should have numDocs", shardLuke.getNumDocsAsLong()); + assertNotNull("each shard should have maxDoc", shardLuke.getMaxDoc()); + sumShardDocs += shardLuke.getNumDocsAsLong(); + } + assertEquals( + "sum of per-shard numDocs should equal merged numDocs", + rsp.getNumDocsAsLong().longValue(), + sumShardDocs); + } + + @Test + public void testDistributedFieldsMerge() throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(DISTRIB, "true"); + + LukeResponse rsp = requestLuke(COLLECTION, params); + + Map fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + + LukeResponse.FieldInfo nameField = fields.get("name"); + assertNotNull("'name' field should be present", nameField); + assertNotNull("field type should be present", nameField.getType()); + assertNotNull("schema flags should be present", nameField.getSchema()); + assertEquals( + "merged docs count for 'name' should equal total docs", + NUM_DOCS, + nameField.getDocsAsLong().longValue()); + + LukeResponse.FieldInfo idField = fields.get("id"); + assertNotNull("'id' field should be present", idField); + assertEquals("id field type should be string", "string", idField.getType()); + + // Validate merged field metadata matches schema and test data + assertLukeXPath( + COLLECTION, + params, + "//lst[@name='index']/long[@name='numDocs'][.='20']", + "count(//lst[@name='shards']/lst)=2", + "//lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", + "//lst[@name='fields']/lst[@name='name']/str[@name='schema'][.='ITS---------------']", + "//lst[@name='fields']/lst[@name='name']/str[@name='index']", + "//lst[@name='fields']/lst[@name='name']/long[@name='docsAsLong'][.='20']", + "//lst[@name='fields']/lst[@name='id']/str[@name='type'][.='string']", + "//lst[@name='fields']/lst[@name='id']/long[@name='docsAsLong'][.='20']"); + } + + @Test + public void testDetailedFieldStatsPerShard() throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(DISTRIB, "true"); + params.set("fl", "name"); + params.set("numTerms", "5"); + + LukeResponse rsp = requestLuke(COLLECTION, params); + + // Top-level fields should NOT have topTerms, distinct, histogram + LukeResponse.FieldInfo nameField = rsp.getFieldInfo().get("name"); + assertNotNull("'name' field should be present", nameField); + assertNull("topTerms should NOT be in top-level fields", nameField.getTopTerms()); + assertEquals("distinct should NOT be in top-level fields", 0, nameField.getDistinct()); + + // Per-shard entries should have detailed stats + Map shards = rsp.getShardResponses(); + assertNotNull("shards section should be present", shards); + + ModifiableSolrParams detailedParams = new ModifiableSolrParams(); + detailedParams.set(DISTRIB, "true"); + detailedParams.set("fl", "name"); + detailedParams.set("numTerms", "5"); + assertLukeXPath( + COLLECTION, + detailedParams, + // Top-level merged field should have type and merged doc count but no detailed stats + "/response/lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", + "/response/lst[@name='fields']/lst[@name='name']/long[@name='docsAsLong'][.='20']", + "not(/response/lst[@name='fields']/lst[@name='name']/lst[@name='topTerms'])", + "not(/response/lst[@name='fields']/lst[@name='name']/lst[@name='histogram'])", + "not(/response/lst[@name='fields']/lst[@name='name']/int[@name='distinct'])", + // Per-shard entries should have detailed stats; each name is unique so docFreq=1 + "//lst[@name='shards']/lst/lst[@name='fields']/lst[@name='name']/lst[@name='topTerms']", + "//lst[@name='shards']/lst/lst[@name='fields']/lst[@name='name']/lst[@name='histogram']/int[@name='1']", + "//lst[@name='shards']/lst/lst[@name='fields']/lst[@name='name']/int[@name='distinct']"); + } + + @Test + public void testLocalModeDefault() throws Exception { + LukeResponse rsp = requestLuke(COLLECTION, null); + + assertNotNull("index info should be present", rsp.getIndexInfo()); + assertNull("shards should NOT be present in local mode", rsp.getShardResponses()); + } + + @Test + public void testExplicitDistribFalse() throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(DISTRIB, "false"); + + LukeResponse rsp = requestLuke(COLLECTION, params); + + assertNotNull("index info should be present", rsp.getIndexInfo()); + assertNull("shards should NOT be present with distrib=false", rsp.getShardResponses()); + } + + /** + * 12 shards, 1 document: only one shard has data, the other 11 are empty. Verifies that + * schema-derived attributes (type, schema flags, dynamicBase) merge correctly when most shards + * have no documents. + */ + @Test + public void testSparseShards() throws Exception { + String collection = "lukeSparse12"; + CollectionAdminRequest.createCollection(collection, "conf", 12, 1) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + cluster.waitForActiveCollection(collection, 12, 12); + + try { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "only-one"); + doc.addField("name", "sparse test"); + doc.addField("subject", "subject value"); + doc.addField("cat_s", "category"); + cluster.getSolrClient().add(collection, doc); + cluster.getSolrClient().commit(collection); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(DISTRIB, "true"); + + LukeResponse rsp = requestLuke(collection, params); + + // Index-level stats + assertEquals("numDocs should be 1", 1, rsp.getNumDocsAsLong().longValue()); + assertTrue("maxDoc should be > 0", rsp.getMaxDoc() > 0); + assertEquals("deletedDocs should be 0", 0, rsp.getDeletedDocsAsLong().longValue()); + + Map shards = rsp.getShardResponses(); + assertNotNull("shards section should be present", shards); + assertEquals("should have 12 shard entries", 12, shards.size()); + + // Exactly one shard should have numDocs=1 + long sumShardDocs = 0; + for (Map.Entry entry : shards.entrySet()) { + LukeResponse shardLuke = entry.getValue(); + assertNotNull("each shard should have numDocs", shardLuke.getNumDocsAsLong()); + sumShardDocs += shardLuke.getNumDocsAsLong(); + } + assertEquals("sum of per-shard numDocs should be 1", 1, sumShardDocs); + + // Field-level checks + Map fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + + // Schema-derived attrs should be present for all fields, even with 11 empty shards + LukeResponse.FieldInfo idField = fields.get("id"); + assertNotNull("'id' field should be present", idField); + assertEquals("id type", "string", idField.getType()); + assertNotNull("id schema flags", idField.getSchema()); + + LukeResponse.FieldInfo nameField = fields.get("name"); + assertNotNull("'name' field should be present", nameField); + assertNotNull("name type", nameField.getType()); + assertNotNull("name schema flags", nameField.getSchema()); + assertEquals("name docs should be 1", 1, nameField.getDocsAsLong().longValue()); + + // Dynamic field — should have dynamicBase in extras + LukeResponse.FieldInfo catField = fields.get("cat_s"); + assertNotNull("'cat_s' field should be present", catField); + assertNotNull("cat_s type", catField.getType()); + assertNotNull("cat_s dynamicBase", catField.getExtras().get("dynamicBase")); + + // Verify structural correctness of the merged response via XPath + ModifiableSolrParams xpathParams = new ModifiableSolrParams(); + xpathParams.set(DISTRIB, "true"); + assertLukeXPath( + collection, + xpathParams, + "//lst[@name='index']/long[@name='numDocs'][.='1']", + "//lst[@name='index']/long[@name='deletedDocs'][.='0']", + "count(//lst[@name='shards']/lst)=12", + "//lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", + "//lst[@name='fields']/lst[@name='name']/str[@name='schema'][.='ITS---------------']", + "//lst[@name='fields']/lst[@name='name']/str[@name='index']", + "//lst[@name='fields']/lst[@name='name']/long[@name='docsAsLong'][.='1']", + "//lst[@name='fields']/lst[@name='cat_s']/str[@name='type'][.='string']", + "//lst[@name='fields']/lst[@name='cat_s']/str[@name='dynamicBase'][.='*_s']", + "//lst[@name='fields']/lst[@name='cat_s']/long[@name='docsAsLong'][.='1']"); + } finally { + CollectionAdminRequest.deleteCollection(collection) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + } + } + + @Test + public void testDistribShowSchema() throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(DISTRIB, "true"); + params.set("show", "schema"); + + assertLukeXPath( + COLLECTION, + params, + "//lst[@name='schema']/lst[@name='fields']/lst[@name='id']/str[@name='type'][.='string']", + "//lst[@name='schema']/lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", + "//lst[@name='schema']/lst[@name='dynamicFields']/lst[@name='*_s']", + "//lst[@name='schema']/str[@name='uniqueKeyField'][.='id']", + "//lst[@name='schema']/lst[@name='types']/lst[@name='string']", + "//lst[@name='schema']/lst[@name='types']/lst[@name='nametext']", + "//lst[@name='schema']/lst[@name='similarity']", + "not(/response/lst[@name='fields'])", + "count(//lst[@name='shards']/lst)=2"); + } + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + /** + * Verifies that distributed Luke detects inconsistent index flags across shards. Uses Schema API + * to change a field's {@code stored} property between indexing on different shards, producing + * different Lucene FieldInfo (and thus different index flags strings) on each shard. + */ + @Test + public void testInconsistentIndexFlagsAcrossShards() throws Exception { + String collection = "lukeInconsistentFlags"; + try { + System.setProperty("managed.schema.mutable", "true"); + CollectionAdminRequest.createCollection(collection, "managed", 2, 1) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + } catch (Exception e) { + log.error("yooo", e); + } + + cluster.waitForActiveCollection(collection, 2, 2); + + try { + // Add a field with stored=true, indexed=true + Map fieldAttrs = new LinkedHashMap<>(); + fieldAttrs.put("name", "test_flag_s"); + fieldAttrs.put("type", "string"); + fieldAttrs.put("stored", true); + fieldAttrs.put("indexed", true); + new SchemaRequest.AddField(fieldAttrs).process(cluster.getSolrClient(), collection); + + // Index a target doc WITH the field, plus seed docs without it + SolrInputDocument targetDoc = new SolrInputDocument(); + targetDoc.addField("id", "target"); + targetDoc.addField("test_flag_s", "has_indexed"); + cluster.getSolrClient().add(collection, targetDoc); + + List seedDocs = new ArrayList<>(); + for (int i = 0; i < 20; i++) { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "seed_" + i); + seedDocs.add(doc); + } + cluster.getSolrClient().add(collection, seedDocs); + cluster.getSolrClient().commit(collection); + + // Find which shard has the target doc by querying each replica directly. + // Must use distrib=false — SolrCloud defaults distrib to true even on direct replica queries. + DocCollection docColl = getCollectionState(collection); + String targetSliceName = null; + for (Slice slice : docColl.getSlices()) { + Replica leader = slice.getLeader(); + try (SolrClient client = getHttpSolrClient(leader)) { + SolrQuery q = new SolrQuery("id:target"); + q.set(DISTRIB, "false"); + QueryResponse qr = client.query(q); + if (qr.getResults().getNumFound() > 0) { + targetSliceName = slice.getName(); + } + } + } + assertNotNull("target doc should exist on a shard", targetSliceName); + + // Find a seed doc on the other shard + String otherDocId = null; + for (Slice slice : docColl.getSlices()) { + if (!slice.getName().equals(targetSliceName)) { + Replica leader = slice.getLeader(); + try (SolrClient client = getHttpSolrClient(leader)) { + SolrQuery q = new SolrQuery("*:*"); + q.setRows(1); + q.set(DISTRIB, "false"); + QueryResponse qr = client.query(q); + assertTrue("other shard should have seed docs", qr.getResults().getNumFound() > 0); + otherDocId = (String) qr.getResults().getFirst().getFieldValue("id"); + } + break; + } + } + assertNotNull("should find a seed doc on the other shard", otherDocId); + + // Change the field to stored=false via Schema API + fieldAttrs.put("stored", false); + new SchemaRequest.ReplaceField(fieldAttrs).process(cluster.getSolrClient(), collection); + + // Reload collection to pick up schema change + CollectionAdminRequest.reloadCollection(collection).process(cluster.getSolrClient()); + + // Update the other-shard doc to include the field (now unstored in the new segment) + SolrInputDocument updateDoc = new SolrInputDocument(); + updateDoc.addField("id", otherDocId); + updateDoc.addField("test_flag_s", "not_indexed"); + cluster.getSolrClient().add(collection, updateDoc); + cluster.getSolrClient().commit(collection); + + // Distributed Luke should detect inconsistent index flags between the two shards. + // One shard has stored=true segments, the other has stored=false segments for test_flag_s. + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(DISTRIB, "true"); + params.set("fl", "test_flag_s"); + + Exception ex = expectThrows(Exception.class, () -> requestLuke(collection, params)); + String fullMessage = SolrException.getRootCause(ex).getMessage(); + assertTrue( + "exception chain should mention inconsistent index flags: " + fullMessage, + fullMessage.contains("inconsistent")); + } finally { + CollectionAdminRequest.deleteCollection(collection) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + } + } + + /** + * Exercises the deferred index flags path: when the first shard to report a field has null index + * flags (all its live docs for that field were deleted, but the field persists in FieldInfos from + * unmerged segments), the merge should still populate index flags from a later shard that has + * live docs. + * + *

Setup: 16-shard collection. Each shard gets one doc with field "flag_target_s" (which is + * then deleted) plus an anchor doc without it (to keep the shard non-empty). Only one shard + * retains a live doc with "flag_target_s". With 16 shards, the probability that the one live + * shard is processed first is low enough. Either way, the merged response should have index flags + * for the field. + */ + @Test + public void testDeferredIndexFlags() throws Exception { + String collection = "lukeDeferredFlags"; + int numShards = 16; + CollectionAdminRequest.createCollection(collection, "conf", numShards, 1) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + cluster.waitForActiveCollection(collection, numShards, numShards); + + try { + // Index one doc with the target field per shard, plus an anchor doc without it. + // The anchor doc keeps the shard non-empty after we delete the target doc. + // We use enough docs to spread across (hopefully) all shards. + List docs = new ArrayList<>(); + for (int i = 0; i < numShards * 4; i++) { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "target_" + i); + doc.addField("flag_target_s", "value_" + i); + docs.add(doc); + + SolrInputDocument anchor = new SolrInputDocument(); + anchor.addField("id", "anchor_" + i); + anchor.addField("name", "anchor"); + docs.add(anchor); + } + cluster.getSolrClient().add(collection, docs); + cluster.getSolrClient().commit(collection); + + // Delete all target docs, leaving only anchors (which don't have flag_target_s) + // on most shards. Keep exactly one target doc alive. + for (int i = 1; i < numShards * 4; i++) { + cluster.getSolrClient().deleteById(collection, "target_" + i); + } + cluster.getSolrClient().commit(collection); + + // Verify: distributed Luke should have index flags for flag_target_s in the merged response, + // whether they came from the first shard (constructor path) or a later shard (deferred path). + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(DISTRIB, "true"); + params.set("fl", "flag_target_s"); + + LukeResponse rsp = requestLuke(collection, params); + + Map fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + LukeResponse.FieldInfo targetField = fields.get("flag_target_s"); + assertNotNull("'flag_target_s' field should be present", targetField); + + // The merged response should have index flags from whichever shard had live docs + ModifiableSolrParams xpathParams = new ModifiableSolrParams(); + xpathParams.set(DISTRIB, "true"); + xpathParams.set("fl", "flag_target_s"); + assertLukeXPath( + collection, + xpathParams, + "//lst[@name='fields']/lst[@name='flag_target_s']/str[@name='type'][.='string']", + "//lst[@name='fields']/lst[@name='flag_target_s']/str[@name='dynamicBase'][.='*_s']", + "//lst[@name='fields']/lst[@name='flag_target_s']/str[@name='index']", + "//lst[@name='fields']/lst[@name='flag_target_s']/long[@name='docsAsLong'][.='1']"); + } finally { + CollectionAdminRequest.deleteCollection(collection) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + } + } + + /** + * Exercises shard error propagation through the distributed doc lookup path. Passing id=0 with + * show=schema triggers a BAD_REQUEST on the shard that has doc 0 (the local handler rejects an id + * combined with a non-DOC show style). The distributed handler should propagate this as a + * SolrException. + */ + @Test + public void testDistributedShardError() { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(DISTRIB, "true"); + params.set("id", "0"); + params.set("show", "schema"); + + Exception ex = expectThrows(Exception.class, () -> requestLuke(COLLECTION, params)); + String fullMessage = SolrException.getRootCause(ex).getMessage(); + assertTrue( + "exception should mention doc style mismatch: " + fullMessage, + fullMessage.contains("missing doc param for doc style")); + } + + /** Verifies that the docId parameter is rejected in distributed mode. */ + @Test + public void testDistributedDocIdRejected() { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(DISTRIB, "true"); + params.set("docId", "0"); + + Exception ex = expectThrows(Exception.class, () -> requestLuke(COLLECTION, params)); + String fullMessage = SolrException.getRootCause(ex).getMessage(); + assertTrue( + "exception should mention docId not supported: " + fullMessage, + fullMessage.contains("docId parameter is not supported in distributed mode")); + } + + /** Verifies distributed doc lookup returns the document when it exists. */ + @Test + public void testDistributedDocLookupFound() throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(DISTRIB, "true"); + params.set("id", "0"); + + assertLukeXPath( + COLLECTION, + params, + "//lst[@name='doc']/int[@name='docId']", + "//lst[@name='doc']/lst[@name='lucene']/lst[@name='id']/str[@name='type'][.='string']", + "//lst[@name='doc']/lst[@name='lucene']/lst[@name='id']/str[@name='value'][.='0']", + "//lst[@name='doc']/lst[@name='lucene']/lst[@name='name']/str[@name='type'][.='nametext']", + "//lst[@name='doc']/lst[@name='lucene']/lst[@name='name']/str[@name='value'][.='name_0']", + "//lst[@name='doc']/arr[@name='solr']/str[.='0']", + "//lst[@name='doc']/arr[@name='solr']/str[.='name_0']", + "//lst[@name='index']", + "//lst[@name='info']"); + } + + /** Verifies distributed doc lookup returns an empty response for a non-existent ID. */ + @Test + public void testDistributedDocLookupNotFound() throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(DISTRIB, "true"); + params.set("id", "this_id_does_not_exist_anywhere"); + + LukeResponse rsp = requestLuke(COLLECTION, params); + + NamedList raw = rsp.getResponse(); + assertNull("doc section should NOT be present for missing ID", raw.get("doc")); + + assertLukeXPath(COLLECTION, params, "not(//lst[@name='doc'])"); + } + + /** + * Verifies that distributed doc lookup detects a corrupt index where the same unique key exists + * on multiple shards. + */ + @Test + public void testDistributedDocLookupDuplicateId() throws Exception { + String collection = "lukeDupId"; + int numShards = 2; + CollectionAdminRequest.createCollection(collection, "conf", numShards, 1) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + cluster.waitForActiveCollection(collection, numShards, numShards); + + try { + String dupId = "duplicate_doc"; + + // Write the same document directly to two shard cores via UpdateHandler, + // completely bypassing the distributed update processor chain. + DocCollection docColl = getCollectionState(collection); + List slices = new ArrayList<>(docColl.getActiveSlices()); + assertTrue("need at least 2 shards", slices.size() >= 2); + + for (int i = 0; i < 2; i++) { + Replica leader = slices.get(i).getLeader(); + JettySolrRunner jetty = + cluster.getJettySolrRunners().stream() + .filter(j -> j.getNodeName().equals(leader.getNodeName())) + .findFirst() + .orElse(null); + assertNotNull("should find jetty for replica", jetty); + + try (SolrCore core = jetty.getCoreContainer().getCore(leader.getCoreName())) { + SolrInputDocument solrDoc = new SolrInputDocument(); + solrDoc.addField("id", dupId); + solrDoc.addField("name", "dup_copy_" + i); + + AddUpdateCommand addCmd = + new AddUpdateCommand(new SolrQueryRequestBase(core, new ModifiableSolrParams()) {}); + addCmd.solrDoc = solrDoc; + core.getUpdateHandler().addDoc(addCmd); + + CommitUpdateCommand commitCmd = + new CommitUpdateCommand( + new SolrQueryRequestBase(core, new ModifiableSolrParams()) {}, false); + commitCmd.waitSearcher = true; + core.getUpdateHandler().commit(commitCmd); + } + } + + // Verify the duplicate actually exists on both shards + int shardsWithDoc = 0; + for (Slice slice : docColl.getActiveSlices()) { + Replica leader = slice.getLeader(); + try (SolrClient client = getHttpSolrClient(leader)) { + SolrQuery q = new SolrQuery("id:" + dupId); + q.set(DISTRIB, "false"); + QueryResponse qr = client.query(q); + if (qr.getResults().getNumFound() > 0) { + shardsWithDoc++; + } + } + } + assertEquals("duplicate doc should exist on exactly 2 shards", 2, shardsWithDoc); + + // Distributed Luke doc lookup should detect the corruption + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(DISTRIB, "true"); + params.set("id", dupId); + + Exception ex = expectThrows(Exception.class, () -> requestLuke(collection, params)); + String fullMessage = SolrException.getRootCause(ex).getMessage(); + assertTrue( + "exception should mention duplicate/corrupt index: " + fullMessage, + fullMessage.contains("found on multiple shards")); + } finally { + CollectionAdminRequest.deleteCollection(collection) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + } + } + + @Test + public void testDistribTrueOnSingleShardFallsBackToLocal() throws Exception { + String singleShardCollection = "lukeSingleShard"; + CollectionAdminRequest.createCollection(singleShardCollection, "conf", 1, 1) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + cluster.waitForActiveCollection(singleShardCollection, 1, 1); + + try { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "single-1"); + doc.addField("name", "test_name"); + cluster.getSolrClient().add(singleShardCollection, doc); + cluster.getSolrClient().commit(singleShardCollection); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(DISTRIB, "true"); + + LukeResponse rsp = requestLuke(singleShardCollection, params); + + assertNotNull( + "index info should be present even with distrib=true on single shard", + rsp.getIndexInfo()); + assertEquals("should see the 1 doc we indexed", 1, rsp.getNumDocsAsLong().longValue()); + assertNull( + "shards should NOT be present when falling back to local", rsp.getShardResponses()); + } finally { + CollectionAdminRequest.deleteCollection(singleShardCollection) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + } + } +} diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc index fb795f62cc17..2186aeec5b78 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc @@ -83,6 +83,18 @@ The number of top terms for each field. Choose whether `/luke` should return the index-flags for each field. Fetching and returning the index-flags for each field in the index has non-zero cost, and can slow down requests to `/luke`. +`distrib`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: `false` +|=== ++ +When set to `true` in SolrCloud mode, the handler aggregates results from all shards in the collection. +Additive index metrics (`numDocs`, `deletedDocs`, `segmentCount`) are summed across shards; `maxDoc` is the maximum across shards. +Field types and schema flags are validated for consistency across shards. +Per-shard index details and per-field detailed statistics are returned under a `shards` key. + == LukeRequestHandler Examples All of the examples in this section assume you are running the "techproducts" Solr example: @@ -118,3 +130,41 @@ Alternatively, to work through the Lucene native id: http://localhost:8983/solr/techproducts/admin/luke?fl=manu&docId=0 From SolrJ, you can access /luke using the {solr-javadocs}/solrj/org/apache/solr/client/solrj/request/LukeRequest.html[`LukeRequest`] object. + +== Distributed Mode (SolrCloud) + +When running in SolrCloud, the Luke handler can aggregate results from all shards in a collection by setting `distrib=true`. +By default, `distrib` is `false` and the handler inspects only the local shard's index. + +To get a collection-wide view: + +[source,text] +http://localhost:8983/solr/techproducts/admin/luke?distrib=true&numTerms=0 + +To get detailed field statistics across all shards for a specific field: + +[source,text] +http://localhost:8983/solr/techproducts/admin/luke?distrib=true&fl=manu + +=== Response Structure + +In distributed mode, the response contains: + +* `index` -- Merged metrics across all shards: `numDocs`, `deletedDocs`, `segmentCount` are summed; `maxDoc` is the maximum across shards. +* `fields` -- Merged field metadata. For each field: `type`, `schema` flags, and `dynamicBase` are validated to be consistent across shards; `index` flags use the first non-null value. The `docs` count is summed. Per-field detailed statistics (`topTerms`, `distinct`, `histogram`) are _not_ included at this level. +* `schema` -- Schema information from the first responding shard (identical across shards sharing the same configset). +* `info` -- Static info from the first responding shard. +* `shards` -- Per-shard details in response-completion order. Each entry contains: +** `index` -- Full index info for that shard (including `directory`, `segmentsFile`, `version`, `current`, `hasDeletions`, `lastModified`, `userData`). +** `fields` -- Only present when `fl` triggers detailed statistics. Contains per-field `topTerms`, `distinct`, and `histogram` from that shard. + +=== Merge Semantics + +Field `type`, `schema` flags, and `dynamicBase` are validated for consistency across shards. +If a mismatch is detected, the handler returns an error identifying the field, the conflicting values, and the shard addresses involved. +The `index` flags are index-derived (not schema-derived) and may legitimately differ across shards; the first non-null value is used. + +Per-field detailed statistics (`topTerms`, `distinct`, `histogram`) are not merged across shards. +These statistics are shard-local and appear in each shard's entry under the `shards` key. +For cross-shard term-level aggregation, Solr's xref:query-guide:faceting.adoc[faceting API] provides refinement and distributed aggregation, though it differs from Luke's term statistics: faceting ignores deleted documents, is more flexible, and is generally more expensive. +A case could be made for adding merge strategies for Luke-powered term statistics (which offer better performance at the cost of flexibility), however this would require community interest to motivate. diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java index c38a2caf8300..66faa1ac7d3b 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java @@ -21,6 +21,7 @@ import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.apache.solr.common.luke.FieldFlag; @@ -116,9 +117,11 @@ public static class FieldInfo implements Serializable { String schema; int docs; int distinct; + Long docsAsLong; EnumSet flags; boolean cacheableFaceting; NamedList topTerms; + Map extras = new HashMap<>(); public FieldInfo(String n) { name = n; @@ -129,19 +132,24 @@ public void read(NamedList nl) { for (Map.Entry entry : nl) { if ("type".equals(entry.getKey())) { type = (String) entry.getValue(); - } - if ("flags".equals(entry.getKey())) { + } else if ("flags".equals(entry.getKey())) { flags = parseFlags((String) entry.getValue()); } else if ("schema".equals(entry.getKey())) { schema = (String) entry.getValue(); } else if ("docs".equals(entry.getKey())) { docs = (Integer) entry.getValue(); + docsAsLong = (long) docs; // widen, lossless + } else if ("docsAsLong".equals(entry.getKey())) { + // Don't set docs — narrowing Long→int is lossy + docsAsLong = (Long) entry.getValue(); } else if ("distinct".equals(entry.getKey())) { distinct = (Integer) entry.getValue(); } else if ("cacheableFaceting".equals(entry.getKey())) { cacheableFaceting = (Boolean) entry.getValue(); } else if ("topTerms".equals(entry.getKey())) { topTerms = (NamedList) entry.getValue(); + } else { + extras.put(entry.getKey(), entry.getValue()); } } } @@ -178,6 +186,10 @@ public int getDocs() { return docs; } + public Long getDocsAsLong() { + return docsAsLong; + } + public String getName() { return name; } @@ -193,12 +205,17 @@ public EnumSet getSchemaFlags() { public NamedList getTopTerms() { return topTerms; } + + public Map getExtras() { + return extras; + } } private NamedList indexInfo; private Map fieldInfo; private Map dynamicFieldInfo; private Map fieldTypeInfo; + private Map shardResponses; @Override @SuppressWarnings("unchecked") @@ -247,6 +264,17 @@ public void setResponse(NamedList res) { } } } + + // Parse shards section (present in distributed responses) + NamedList shardsNL = (NamedList) res.get("shards"); + if (shardsNL != null) { + shardResponses = new LinkedHashMap<>(); + for (Map.Entry entry : shardsNL) { + LukeResponse shardRsp = new LukeResponse(); + shardRsp.setResponse((NamedList) entry.getValue()); + shardResponses.put(entry.getKey(), shardRsp); + } + } } // ---------------------------------------------------------------- @@ -257,19 +285,42 @@ public String getIndexDirectory() { return (String) indexInfo.get("directory"); } - public Integer getNumDocs() { + private Long getIndexLong(String key) { if (indexInfo == null) return null; - return (Integer) indexInfo.get("numDocs"); + Number n = (Number) indexInfo.get(key); + return n != null ? n.longValue() : null; + } + + public Integer getNumDocs() { + if (indexInfo == null) { + return null; + } + Object val = indexInfo.get("numDocs"); + return val instanceof Integer i ? i : null; + } + + public Long getNumDocsAsLong() { + return getIndexLong("numDocs"); } public Integer getMaxDoc() { - if (indexInfo == null) return null; - return (Integer) indexInfo.get("maxDoc"); + if (indexInfo == null) { + return null; + } + Object val = indexInfo.get("maxDoc"); + return val instanceof Integer i ? i : null; + } + + public Long getDeletedDocsAsLong() { + return getIndexLong("deletedDocs"); } public Integer getNumTerms() { - if (indexInfo == null) return null; - return (Integer) indexInfo.get("numTerms"); + if (indexInfo == null) { + return null; + } + Object val = indexInfo.get("numTerms"); + return val instanceof Integer i ? i : null; } public Map getFieldTypeInfo() { @@ -300,5 +351,9 @@ public FieldInfo getDynamicFieldInfo(String f) { return dynamicFieldInfo.get(f); } + public Map getShardResponses() { + return shardResponses; + } + // ---------------------------------------------------------------- }