diff --git a/solr/core/src/java/org/apache/solr/update/processor/ContentHashVersionProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/ContentHashVersionProcessor.java
new file mode 100644
index 00000000000..c17dada5eb1
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/update/processor/ContentHashVersionProcessor.java
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update.processor;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.Base64;
+import java.util.Collection;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+import java.util.function.Predicate;
+import org.apache.lucene.util.BytesRef;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.component.RealTimeGetComponent;
+import org.apache.solr.handler.component.RealTimeGetComponent.Resolution;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.schema.TextField;
+import org.apache.solr.update.AddUpdateCommand;
+import org.apache.solr.update.UpdateCommand;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * An implementation of {@link UpdateRequestProcessor} which computes a hash of field values, and
+ * uses this hash to reject/accept document updates.
+ *
+ *
+ * - When no corresponding document with same id exists (create), the computed hash is added to
+ * the document.
+ *
- When a previous document exists (update), a new hash is computed from the incoming field
+ * values and compared with the stored hash.
+ *
+ *
+ * Depending on {#dropSameDocuments} value, this processor may drop or accept document updates.
+ * This implementation can be used for monitoring or dropping no-op updates (updates that do not
+ * change the Solr document content).
+ *
+ *
Note: the hash is computed using {@link Lookup3Signature} and must be stored in a field with
+ * docValues enabled for retrieval.
+ *
+ * @see Lookup3Signature
+ */
+public class ContentHashVersionProcessor extends UpdateRequestProcessor {
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+ private final SchemaField hashField;
+ private final SolrQueryResponse rsp;
+ private final SolrCore core;
+ private final Predicate includedFields; // Matcher for included fields in hash
+ private final Predicate excludedFields; // Matcher for excluded fields from hash
+ private boolean dropSameDocuments;
+ private int sameCount = 0;
+ private int differentCount = 0;
+
+ public ContentHashVersionProcessor(
+ Predicate hashIncludedFields,
+ Predicate hashExcludedFields,
+ String hashFieldName,
+ SolrQueryRequest req,
+ SolrQueryResponse rsp,
+ UpdateRequestProcessor next) {
+ super(next);
+ this.core = req.getCore();
+ this.hashField = new SchemaField(hashFieldName, new TextField());
+ this.rsp = rsp;
+ this.includedFields = hashIncludedFields;
+ this.excludedFields = hashExcludedFields;
+ }
+
+ @Override
+ public void processAdd(AddUpdateCommand cmd) throws IOException {
+ SolrInputDocument newDoc = cmd.getSolrInputDocument();
+ String newHash = computeDocHash(newDoc);
+ newDoc.setField(hashField.getName(), newHash);
+ int i = 0;
+
+ if (!isHashAcceptable(cmd.getIndexedId(), newHash)) {
+ return;
+ }
+
+ while (true) {
+ logOverlyFailedRetries(i, cmd);
+ try {
+ super.processAdd(cmd);
+ return;
+ } catch (SolrException e) {
+ if (e.code() != 409) {
+ throw e;
+ }
+ ++i;
+ }
+ }
+ }
+
+ @Override
+ public void finish() throws IOException {
+ try {
+ super.finish();
+ } finally {
+ // Only log when there are updates to existing documents
+ int totalUpdates = sameCount + differentCount;
+ if (totalUpdates > 0) {
+ if (dropSameDocuments) {
+ rsp.addToLog("contentHash.duplicatesDropped", sameCount);
+ rsp.addToLog("contentHash.duplicatesDetected", sameCount);
+ } else {
+ rsp.addToLog("contentHash.duplicatesDropped", 0);
+ rsp.addToLog("contentHash.duplicatesDetected", sameCount);
+ }
+ rsp.addToLog("contentHash.changed", differentCount);
+ } else {
+ rsp.addToLog("contentHash.duplicatesDropped", 0);
+ rsp.addToLog("contentHash.duplicatesDetected", 0);
+ rsp.addToLog("contentHash.changed", 0);
+ }
+ }
+ }
+
+ private static void logOverlyFailedRetries(int i, UpdateCommand cmd) {
+ if ((i & 255) == 255) {
+ log.warn("Unusual number of optimistic concurrency retries: retries={} cmd={}", i, cmd);
+ }
+ }
+
+ void setDropSameDocuments(boolean dropSameDocuments) {
+ this.dropSameDocuments = dropSameDocuments;
+ }
+
+ private boolean isHashAcceptable(BytesRef indexedDocId, String newHash) throws IOException {
+ assert null != indexedDocId;
+
+ Optional oldDocHash = getOldDocHash(indexedDocId);
+ if (oldDocHash.isPresent()) {
+ String oldHash = oldDocHash.get(); // No hash: might want to keep track of these too
+ if (Objects.equals(newHash, oldHash)) {
+ sameCount++;
+ return !dropSameDocuments;
+ } else {
+ differentCount++;
+ return true;
+ }
+ }
+ return true; // Doc not found
+ }
+
+ /** Retrieves the hash value from the old document identified by the given ID. */
+ private Optional getOldDocHash(BytesRef indexedDocId) throws IOException {
+ SolrInputDocument oldDoc =
+ RealTimeGetComponent.getInputDocument(
+ core, indexedDocId, indexedDocId, null, Set.of(hashField.getName()), Resolution.DOC);
+ if (oldDoc == null) {
+ return Optional.empty();
+ }
+ Object o = oldDoc.getFieldValue(hashField.getName());
+ return Optional.ofNullable(o).map(String::valueOf);
+ }
+
+ String computeDocHash(SolrInputDocument doc) {
+ final Signature sig = new Lookup3Signature();
+
+ // Stream field names, filter, sort, and process in a single pass
+ doc.getFieldNames().stream()
+ .filter(includedFields) // Keep fields that match 'included fields' matcher
+ .filter(excludedFields.negate()) // Exclude fields that match 'excluded fields' matcher
+ .sorted() // Sort to ensure consistent field order across different doc field orders
+ .forEach(
+ fieldName -> {
+ sig.add(fieldName);
+ Object o = doc.getFieldValue(fieldName);
+ if (o instanceof Collection) {
+ for (Object oo : (Collection>) o) {
+ sig.add(String.valueOf(oo));
+ }
+ } else {
+ sig.add(String.valueOf(o));
+ }
+ });
+
+ // Signature, depending on implementation, may return 8-byte or 16-byte value
+ byte[] signature = sig.getSignature();
+ return Base64.getEncoder().encodeToString(signature);
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/update/processor/ContentHashVersionProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/ContentHashVersionProcessorFactory.java
new file mode 100644
index 00000000000..bdff30f062d
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/update/processor/ContentHashVersionProcessorFactory.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update.processor;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.StrUtils;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.util.plugin.SolrCoreAware;
+
+/** Factory for {@link ContentHashVersionProcessor} instances. */
+public class ContentHashVersionProcessorFactory extends UpdateRequestProcessorFactory
+ implements SolrCoreAware, UpdateRequestProcessorFactory.RunAlways {
+ private static final char SEPARATOR = ','; // Separator for included/excluded fields
+ private List includeFields = List.of("*"); // Included fields defaults to 'all'
+ private List excludeFields = new ArrayList<>();
+ private String hashFieldName; // Must be explicitly configured
+ private boolean dropSameDocuments = true;
+
+ public ContentHashVersionProcessorFactory() {}
+
+ public void init(NamedList> args) {
+ Object tmp = args.remove("includeFields");
+ if (tmp != null) {
+ if (!(tmp instanceof String)) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR, "'includeFields' must be configured as a ");
+ }
+ // Include fields support comma separated list of fields (e.g. "field1,field2,field3").
+ // Also supports "*" to include all fields
+ this.includeFields =
+ StrUtils.splitSmart((String) tmp, SEPARATOR).stream()
+ .map(String::trim)
+ .collect(Collectors.toList());
+ }
+ tmp = args.remove("hashFieldName");
+ if (tmp == null) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR,
+ "'hashFieldName' is required and must be explicitly configured");
+ }
+ if (!(tmp instanceof String)) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR, "'hashFieldName' must be configured as a ");
+ }
+ this.hashFieldName = (String) tmp;
+
+ tmp = args.remove("excludeFields");
+ if (tmp != null) {
+ if (!(tmp instanceof String)) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR, "'excludeFields' must be configured as a ");
+ }
+ if ("*".equals(((String) tmp).trim())) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR, "'excludeFields' can't exclude all fields.");
+ }
+ // Exclude fields support comma separated list of fields (e.g.
+ // "excluded_field1,excluded_field2").
+ // Also supports "*" to exclude all fields
+ this.excludeFields =
+ StrUtils.splitSmart((String) tmp, SEPARATOR).stream()
+ .map(String::trim)
+ .collect(Collectors.toList());
+ }
+ excludeFields.add(hashFieldName); // Hash field name is excluded from hash computation
+
+ tmp = args.remove("hashCompareStrategy");
+ if (tmp != null) {
+ if (!(tmp instanceof String)) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR,
+ "'hashCompareStrategy' must be configured as a ");
+ }
+ String value = ((String) tmp).toLowerCase(Locale.ROOT);
+ if ("drop".equalsIgnoreCase(value)) {
+ dropSameDocuments = true;
+ } else if ("log".equalsIgnoreCase(value)) {
+ dropSameDocuments = false;
+ } else {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR,
+ "Value '"
+ + value
+ + "' is unsupported for 'hashCompareStrategy', only 'drop' and 'log' are supported.");
+ }
+ }
+
+ super.init(args);
+ }
+
+ public UpdateRequestProcessor getInstance(
+ SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
+ ContentHashVersionProcessor processor =
+ new ContentHashVersionProcessor(
+ buildFieldMatcher(includeFields),
+ buildFieldMatcher(excludeFields),
+ hashFieldName,
+ req,
+ rsp,
+ next);
+ processor.setDropSameDocuments(dropSameDocuments);
+ return processor;
+ }
+
+ public void inform(SolrCore core) {
+ if (core.getLatestSchema().getUniqueKeyField() == null) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR, "schema must have uniqueKey defined.");
+ }
+ }
+
+ public String getHashFieldName() {
+ return hashFieldName;
+ }
+
+ public List getIncludeFields() {
+ return includeFields;
+ }
+
+ public List getExcludeFields() {
+ return excludeFields;
+ }
+
+ public boolean dropSameDocuments() {
+ return dropSameDocuments;
+ }
+
+ static Predicate buildFieldMatcher(List fieldNames) {
+ return fieldName -> {
+ for (String currentFieldName : fieldNames) {
+ if ("*".equals(currentFieldName)) {
+ return true;
+ }
+ if (fieldName.equals(currentFieldName)) {
+ return true;
+ }
+ if (currentFieldName.length() > 1
+ && currentFieldName.endsWith("*")
+ && fieldName.startsWith(currentFieldName.substring(0, currentFieldName.length() - 1))) {
+ return true;
+ }
+ }
+ return false;
+ };
+ }
+}
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema16.xml b/solr/core/src/test-files/solr/collection1/conf/schema16.xml
new file mode 100644
index 00000000000..3cfdcc119fe
--- /dev/null
+++ b/solr/core/src/test-files/solr/collection1/conf/schema16.xml
@@ -0,0 +1,33 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ _id
+
diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-contenthashversion.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-contenthashversion.xml
new file mode 100644
index 00000000000..5afe4b167cf
--- /dev/null
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-contenthashversion.xml
@@ -0,0 +1,66 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LATEST}
+
+
+
+
+ ${solr.data.dir:}
+
+
+
+
+
+ ${solr.ulog.dir:}
+
+
+
+
+
+
+ _hash_
+ _id
+
+
+
+
+
+
+ _hash_
+ _id
+ log
+
+
+
+
+
+
+ _hash_
+ _id
+ drop
+
+
+
+
+
+
+
diff --git a/solr/core/src/test/org/apache/solr/update/processor/ContentHashVersionProcessorFactoryTest.java b/solr/core/src/test/org/apache/solr/update/processor/ContentHashVersionProcessorFactoryTest.java
new file mode 100644
index 00000000000..807a1987264
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/update/processor/ContentHashVersionProcessorFactoryTest.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.update.processor;
+
+import static org.apache.solr.SolrTestCaseJ4.assumeWorkingMockito;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.util.List;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.NamedList;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class ContentHashVersionProcessorFactoryTest {
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ assumeWorkingMockito();
+ }
+
+ @Test
+ public void shouldHaveSensibleDefaultValues() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ assertEquals(List.of("*"), factory.getIncludeFields());
+ assertTrue(factory.dropSameDocuments());
+ }
+
+ @Test
+ public void shouldInitWithHashFieldName() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ NamedList args = new NamedList<>();
+ args.add("hashFieldName", "_hash_field_");
+ factory.init(args);
+
+ assertEquals("_hash_field_", factory.getHashFieldName());
+ }
+
+ @Test
+ public void shouldInitWithAllField() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ NamedList args = new NamedList<>();
+ args.add("hashFieldName", "content_hash");
+ args.add("includeFields", "*");
+ factory.init(args);
+
+ assertEquals(1, factory.getIncludeFields().size());
+ assertEquals("*", factory.getIncludeFields().getFirst());
+ }
+
+ @Test
+ public void shouldInitWithIncludedFields() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ NamedList args = new NamedList<>();
+ args.add("hashFieldName", "content_hash");
+ args.add("includeFields", " field1,field2 , field3 ");
+ factory.init(args);
+
+ assertEquals(3, factory.getIncludeFields().size());
+ assertEquals(List.of("field1", "field2", "field3"), factory.getIncludeFields());
+ }
+
+ @Test
+ public void shouldInitWithExcludedFields() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ NamedList args = new NamedList<>();
+ args.add("hashFieldName", "content_hash");
+ args.add("excludeFields", " field1,field2 , field3 ");
+ factory.init(args);
+
+ assertEquals(4, factory.getExcludeFields().size());
+ assertEquals(List.of("field1", "field2", "field3", "content_hash"), factory.getExcludeFields());
+ }
+
+ @Test
+ public void shouldSelectDropStrategy() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ NamedList args = new NamedList<>();
+ args.add("hashFieldName", "content_hash");
+ args.add("hashCompareStrategy", "drop");
+ factory.init(args);
+
+ assertTrue(factory.dropSameDocuments());
+ }
+
+ @Test
+ public void shouldSelectLogStrategy() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ NamedList args = new NamedList<>();
+ args.add("hashFieldName", "content_hash");
+ args.add("hashCompareStrategy", "log");
+ factory.init(args);
+
+ assertFalse(factory.dropSameDocuments());
+ }
+
+ @Test(expected = SolrException.class)
+ public void shouldSelectUnsupportedStrategy() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ NamedList args = new NamedList<>();
+ args.add("hashFieldName", "content_hash");
+ args.add("hashCompareStrategy", "unsupported value");
+ factory.init(args);
+ }
+
+ @Test(expected = SolrException.class)
+ public void shouldRejectExcludeAllFields() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ NamedList args = new NamedList<>();
+ args.add("hashFieldName", "content_hash");
+ args.add("excludeFields", "*");
+ factory.init(args);
+ }
+
+ @Test(expected = SolrException.class)
+ public void shouldRequireExplicitHashFieldName() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ NamedList args = new NamedList<>();
+ // Intentionally not setting hashFieldName
+ factory.init(args);
+ }
+
+ @Test
+ public void shouldAutoExcludeHashFieldFromHashComputation() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ NamedList args = new NamedList<>();
+ args.add("hashFieldName", "my_hash_field");
+ args.add("excludeFields", "field1,field2");
+ factory.init(args);
+
+ // Hash field should be automatically added to excludeFields
+ assertEquals(3, factory.getExcludeFields().size());
+ assertTrue(
+ "Should contain explicitly excluded field1", factory.getExcludeFields().contains("field1"));
+ assertTrue(
+ "Should contain explicitly excluded field2", factory.getExcludeFields().contains("field2"));
+ assertTrue(
+ "Should auto-exclude hash field name",
+ factory.getExcludeFields().contains("my_hash_field"));
+ }
+}
diff --git a/solr/core/src/test/org/apache/solr/update/processor/ContentHashVersionProcessorTest.java b/solr/core/src/test/org/apache/solr/update/processor/ContentHashVersionProcessorTest.java
new file mode 100644
index 00000000000..6b9c908cbfe
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/update/processor/ContentHashVersionProcessorTest.java
@@ -0,0 +1,433 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.update.processor;
+
+import static org.mockito.Mockito.mock;
+
+import java.util.List;
+import java.util.UUID;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.jspecify.annotations.NonNull;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class ContentHashVersionProcessorTest extends UpdateProcessorTestBase {
+
+ public static final String ID_FIELD = "_id";
+ public static final String FIRST_FIELD = "field1";
+ public static final String SECOND_FIELD = "field2";
+ public static final String THIRD_FIELD = "docField3";
+ public static final String FOURTH_FIELD = "field4";
+
+ public static final String INITIAL_DOC_ID = "1";
+ public static final String INITIAL_FIELD1_VALUE = "Initial values used to compute initial hash";
+ public static final String INITIAL_FIELD2_VALUE =
+ "This a constant value for testing include/exclude fields";
+ public static final String[] INITIAL_DOC =
+ new String[] {
+ ID_FIELD, INITIAL_DOC_ID,
+ FIRST_FIELD, INITIAL_FIELD1_VALUE,
+ SECOND_FIELD, INITIAL_FIELD2_VALUE
+ };
+ private String initialDocHash;
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ initCore("solrconfig-contenthashversion.xml", "schema16.xml");
+ }
+
+ @Before
+ public void setUp() throws Exception {
+ super.setUp();
+ assertU(delQ("*:*"));
+ addDoc(adoc(INITIAL_DOC), "contenthashversion-default");
+ assertU(commit());
+
+ // Query for the document and extract _hash_ field value
+ initialDocHash = getHashFieldValue(INITIAL_DOC_ID);
+ }
+
+ private static @NonNull String getHashFieldValue(String docId) throws Exception {
+ String response = h.query(req("q", ID_FIELD + ":" + docId, "fl", "_hash_"));
+
+ // Parse XML response to extract _hash_ field value
+ // Response format: value
+ String hashPattern = "";
+ int startIdx = response.indexOf(hashPattern);
+ if (startIdx == -1) {
+ fail("Hash field not found in document " + docId);
+ }
+ startIdx += hashPattern.length();
+ int endIdx = response.indexOf("", startIdx);
+ if (endIdx == -1) {
+ fail("Hash field closing tag not found");
+ }
+ return response.substring(startIdx, endIdx);
+ }
+
+ private ContentHashVersionProcessor getContentHashVersionProcessor(
+ List includedFields, List excludedFields) {
+ return new ContentHashVersionProcessor(
+ ContentHashVersionProcessorFactory.buildFieldMatcher(includedFields),
+ ContentHashVersionProcessorFactory.buildFieldMatcher(excludedFields),
+ "_hash_",
+ mock(SolrQueryRequest.class),
+ mock(SolrQueryResponse.class),
+ mock(UpdateRequestProcessor.class));
+ }
+
+ @Test
+ public void shouldUseExcludedFieldsWildcard() {
+ // Given
+ ContentHashVersionProcessor processor =
+ getContentHashVersionProcessor(List.of("*"), List.of("field*"));
+
+ // Given (doc for update)
+ SolrInputDocument inputDocument =
+ doc(
+ f(ID_FIELD, "0000000001"),
+ f(FIRST_FIELD, UUID.randomUUID().toString()),
+ f(SECOND_FIELD, UUID.randomUUID().toString()),
+ f(THIRD_FIELD, "constant to have a constant hash"),
+ f(FOURTH_FIELD, UUID.randomUUID().toString()));
+
+ // Then (only ID and THIRD_FIELD is used in hash, other fields contain random values)
+ assertEquals(
+ "bwE8Zjq0aOs=", processor.computeDocHash(inputDocument)); // Hash if only ID field was used
+ }
+
+ @Test
+ public void shouldUseIncludedFieldsWildcard() {
+ // Given
+ ContentHashVersionProcessor processor =
+ getContentHashVersionProcessor(List.of("field*"), List.of(THIRD_FIELD));
+
+ // Given (doc for update)
+ SolrInputDocument inputDocument =
+ doc(
+ f(ID_FIELD, "0000000001"),
+ f(FIRST_FIELD, "constant to have a constant hash for field1"),
+ f(SECOND_FIELD, "constant to have a constant hash for field2"),
+ f(THIRD_FIELD, UUID.randomUUID().toString()),
+ f(FOURTH_FIELD, "constant to have a constant hash for field4"));
+
+ // Then
+ assertEquals("PozPs2qZQtw=", processor.computeDocHash(inputDocument));
+ }
+
+ @Test
+ public void shouldUseIncludedFieldsWildcard2() {
+ // Given (variant of previous shouldUseIncludedFieldsWildcard, without the excludedField config)
+ ContentHashVersionProcessor processor =
+ getContentHashVersionProcessor(List.of("field*"), List.of());
+
+ // Given (doc for update)
+ SolrInputDocument inputDocument =
+ doc(
+ f(ID_FIELD, "0000000001"),
+ f(FIRST_FIELD, "constant to have a constant hash for field1"),
+ f(SECOND_FIELD, "constant to have a constant hash for field2"),
+ f(THIRD_FIELD, UUID.randomUUID().toString()),
+ f(FOURTH_FIELD, "constant to have a constant hash for field4"));
+
+ // Then
+ assertEquals("PozPs2qZQtw=", processor.computeDocHash(inputDocument));
+ }
+
+ @Test
+ public void shouldDedupIncludedFields() {
+ // Given (processor to include field1 and field2 only)
+ ContentHashVersionProcessor processorWithDuplicatedFieldName =
+ getContentHashVersionProcessor(List.of(FIRST_FIELD, FIRST_FIELD, SECOND_FIELD), List.of());
+ ContentHashVersionProcessor processorWithWildcard =
+ getContentHashVersionProcessor(
+ List.of( // Also change order of config (test reorder of field names)
+ SECOND_FIELD, FIRST_FIELD, "field1*"),
+ List.of());
+
+ // Given (doc for update)
+ SolrInputDocument inputDocument =
+ doc(
+ f(ID_FIELD, "0000000001"),
+ f(FIRST_FIELD, "constant to have a constant hash for field1"),
+ f(SECOND_FIELD, "constant to have a constant hash for field2"),
+ f(THIRD_FIELD, UUID.randomUUID().toString()),
+ f(FOURTH_FIELD, "constant to have a constant hash for field4"));
+
+ // Then
+ assertEquals("XavrOYGlkXM=", processorWithDuplicatedFieldName.computeDocHash(inputDocument));
+ assertEquals("XavrOYGlkXM=", processorWithWildcard.computeDocHash(inputDocument));
+ }
+
+ @Test
+ public void shouldCreateSignatureForNewDoc() throws Exception {
+ // When (update)
+ final String newDocId = UUID.randomUUID().toString();
+ assertU(
+ adoc(
+ ID_FIELD, newDocId,
+ FIRST_FIELD, INITIAL_FIELD1_VALUE,
+ SECOND_FIELD, INITIAL_FIELD2_VALUE));
+ assertU(commit());
+
+ // Then
+ final String hashFieldValueForNewDoc = getHashFieldValue(newDocId);
+ assertEquals(initialDocHash, hashFieldValueForNewDoc);
+ }
+
+ @Test
+ public void shouldAddToResponseLog() throws Exception {
+ // Given (command to update existing doc)
+ final String newDocId = UUID.randomUUID().toString();
+ final SolrQueryResponse update1 =
+ addDocWithResponse(
+ adoc(
+ ID_FIELD, newDocId,
+ FIRST_FIELD, INITIAL_FIELD1_VALUE,
+ SECOND_FIELD, INITIAL_FIELD2_VALUE),
+ "contenthashversion-default");
+ final SolrQueryResponse update2 =
+ addDocWithResponse(
+ adoc(
+ ID_FIELD, newDocId,
+ FIRST_FIELD, "This is a doc with values",
+ SECOND_FIELD, "that differs from stored doc, so it's considered new"),
+ "contenthashversion-default");
+ assertU(commit());
+
+ // Then
+ assertResponse(update1, 0, 0, 0);
+ assertResponse(update2, 0, 0, 1);
+ }
+
+ @Test
+ public void shouldKeepDuplicateDocumentsInLogMode() throws Exception {
+ // Given: Use log chain which detects but does NOT drop duplicates
+ final String docId = UUID.randomUUID().toString();
+
+ // When: Add a document
+ addDoc(
+ adoc(
+ ID_FIELD, docId,
+ FIRST_FIELD, "original value",
+ SECOND_FIELD, "original value 2"),
+ "contenthashversion-log");
+ assertU(commit());
+ String originalHash = getHashFieldValue(docId);
+
+ // When: Try to add the same content again (duplicate)
+ SolrQueryResponse duplicateResponse =
+ addDocWithResponse(
+ adoc(
+ ID_FIELD, docId,
+ FIRST_FIELD, "original value",
+ SECOND_FIELD, "original value 2"),
+ "contenthashversion-log");
+ assertU(commit());
+
+ // Then: Response should show duplicate was detected but NOT dropped
+ assertResponse(duplicateResponse, 0, 1, 0);
+
+ // Then: Document should still exist in index
+ assertQ(req("q", ID_FIELD + ":" + docId), "//result[@numFound='1']");
+
+ // Then: Document hash should remain unchanged (duplicate was processed)
+ String currentHash = getHashFieldValue(docId);
+ assertEquals("Hash should remain unchanged for duplicate", originalHash, currentHash);
+
+ // When: Update with different content
+ SolrQueryResponse changedResponse =
+ addDocWithResponse(
+ adoc(
+ ID_FIELD, docId,
+ FIRST_FIELD, "changed value",
+ SECOND_FIELD, "changed value 2"),
+ "contenthashversion-log");
+ assertU(commit());
+
+ // Then: Response should show content changed
+ assertResponse(changedResponse, 0, 0, 1);
+
+ // Then: Hash should be updated
+ String newHash = getHashFieldValue(docId);
+ assertNotEquals("Hash should change for different content", originalHash, newHash);
+ }
+
+ @Test
+ public void shouldExcludeFieldsUpdateSignatureForNewDoc() throws Exception {
+ // Given (update using URP chain WITHOUT drop doc (log mode))
+ final String newDocId = UUID.randomUUID().toString();
+ addDoc(
+ adoc(
+ ID_FIELD, newDocId,
+ FIRST_FIELD, INITIAL_FIELD1_VALUE,
+ SECOND_FIELD, INITIAL_FIELD2_VALUE),
+ "contenthashversion-default");
+ assertU(commit());
+
+ // Then
+ final String hashFieldValue = getHashFieldValue(newDocId);
+ assertEquals(initialDocHash, hashFieldValue);
+ }
+
+ @Test
+ public void shouldCommitWithDropModeEnabled() throws Exception {
+ // Initial document already exists from setUp()
+ // When: Try to add the same document again (duplicate content) using URP chain WITH drop doc
+ // (drop mode)
+ SolrQueryResponse solrQueryResponse =
+ addDocWithResponse(
+ adoc(
+ ID_FIELD, INITIAL_DOC_ID,
+ FIRST_FIELD, INITIAL_FIELD1_VALUE,
+ SECOND_FIELD, INITIAL_FIELD2_VALUE),
+ "contenthashversion-drop");
+ assertU(commit());
+
+ // Then: Verify response shows duplicate was dropped
+ assertResponse(solrQueryResponse, 1, 1, 0);
+
+ // Then: Verify document was NOT actually added/updated (still only 1 doc in index)
+ assertQ(req("q", "*:*"), "//result[@numFound='1']");
+
+ // Verify the document still has the original hash
+ String currentHash = getHashFieldValue(INITIAL_DOC_ID);
+ assertEquals("Document hash should not have changed", initialDocHash, currentHash);
+ }
+
+ @Test
+ public void shouldHandleDocumentWithOnlyIdField() {
+ // Given: Document with only ID field (no other fields to hash)
+ ContentHashVersionProcessor processor =
+ getContentHashVersionProcessor(List.of("*"), List.of(ID_FIELD));
+
+ // When: Compute hash for document with only ID
+ SolrInputDocument doc = doc(f(ID_FIELD, "only-id-doc"));
+
+ // Then: Should compute hash (even if empty field set)
+ String hash = processor.computeDocHash(doc);
+ assertNotNull("Hash should not be null for ID-only document", hash);
+ assertFalse("Hash should not be empty", hash.isEmpty());
+ }
+
+ @Test
+ public void shouldHandleMultiValueFields() {
+ // Given: Processor that includes multi-value fields
+ ContentHashVersionProcessor processor =
+ getContentHashVersionProcessor(List.of("*"), List.of(ID_FIELD));
+
+ // When: Document with multi-value field
+ SolrInputDocument doc1 = doc(f(ID_FIELD, "doc1"), f(FIRST_FIELD, "value1", "value2", "value3"));
+
+ // Then: Should compute consistent hash
+ String hash1 = processor.computeDocHash(doc1);
+ assertNotNull(hash1);
+
+ // Same values in same order should produce same hash
+ SolrInputDocument doc2 = doc(f(ID_FIELD, "doc2"), f(FIRST_FIELD, "value1", "value2", "value3"));
+ String hash2 = processor.computeDocHash(doc2);
+ assertEquals("Same multi-value field should produce same hash", hash1, hash2);
+
+ // Different order should produce different hash (collection order matters)
+ SolrInputDocument doc3 = doc(f(ID_FIELD, "doc3"), f(FIRST_FIELD, "value3", "value1", "value2"));
+ String hash3 = processor.computeDocHash(doc3);
+ assertNotEquals("Different order should produce different hash", hash1, hash3);
+ }
+
+ @Test
+ public void shouldHandleNullFieldValues() {
+ // Given: Processor that handles null values
+ ContentHashVersionProcessor processor =
+ getContentHashVersionProcessor(List.of("*"), List.of(ID_FIELD));
+
+ // When: Document with null field value (represented as "null" string)
+ SolrInputDocument doc = doc(f(ID_FIELD, "null-doc"), f(FIRST_FIELD, (Object) null));
+
+ // Then: Should compute hash without error
+ String hash = processor.computeDocHash(doc);
+ assertNotNull("Should handle null values", hash);
+ assertFalse("Hash should not be empty", hash.isEmpty());
+ }
+
+ @Test
+ public void shouldProduceSameHashRegardlessOfFieldOrder() {
+ // Given: Documents with same fields in different order
+ ContentHashVersionProcessor processor =
+ getContentHashVersionProcessor(List.of("*"), List.of(ID_FIELD));
+
+ // When: Create docs with fields in different order
+ SolrInputDocument doc1 =
+ doc(
+ f(ID_FIELD, "doc1"),
+ f(FIRST_FIELD, "value1"),
+ f(SECOND_FIELD, "value2"),
+ f(THIRD_FIELD, "value3"));
+
+ SolrInputDocument doc2 =
+ doc(
+ f(ID_FIELD, "doc2"),
+ f(THIRD_FIELD, "value3"),
+ f(FIRST_FIELD, "value1"),
+ f(SECOND_FIELD, "value2"));
+
+ // Then: Hashes should be identical (fields are sorted before hashing)
+ String hash1 = processor.computeDocHash(doc1);
+ String hash2 = processor.computeDocHash(doc2);
+ assertEquals("Hash should be same regardless of field order", hash1, hash2);
+ }
+
+ @Test
+ public void shouldHandleEmptyFieldValues() {
+ // Given: Document with empty string values
+ ContentHashVersionProcessor processor =
+ getContentHashVersionProcessor(List.of("*"), List.of(ID_FIELD));
+
+ SolrInputDocument doc1 = doc(f(ID_FIELD, "empty-doc"), f(FIRST_FIELD, ""), f(SECOND_FIELD, ""));
+
+ // When: Compute hash
+ String hash1 = processor.computeDocHash(doc1);
+
+ // Then: Should produce valid hash
+ assertNotNull("Should handle empty values", hash1);
+ assertFalse("Hash should not be empty", hash1.isEmpty());
+
+ // Empty strings should produce different hash than no fields
+ SolrInputDocument doc2 = doc(f(ID_FIELD, "empty-doc"));
+ String hash2 = processor.computeDocHash(doc2);
+ assertNotEquals("Empty string fields should differ from no fields", hash1, hash2);
+ }
+
+ private static void assertResponse(
+ SolrQueryResponse solrQueryResponse,
+ int droppedDocCount,
+ int duplicateDocCount,
+ int changedDocCount) {
+ assertNotNull(solrQueryResponse.getToLog().get("contentHash.duplicatesDropped"));
+ assertNotNull(solrQueryResponse.getToLog().get("contentHash.duplicatesDetected"));
+ assertNotNull(solrQueryResponse.getToLog().get("contentHash.changed"));
+
+ int droppedDocs = (int) solrQueryResponse.getToLog().get("contentHash.duplicatesDropped");
+ int duplicateDocs = (int) solrQueryResponse.getToLog().get("contentHash.duplicatesDetected");
+ int changedDocs = (int) solrQueryResponse.getToLog().get("contentHash.changed");
+ assertEquals(droppedDocCount, droppedDocs);
+ assertEquals(duplicateDocCount, duplicateDocs);
+ assertEquals(changedDocCount, changedDocs);
+ }
+}
diff --git a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java
index 3ff7e74aaa2..9df7e06cb6c 100644
--- a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java
+++ b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java
@@ -1161,6 +1161,11 @@ public static String adoc(SolrInputDocument sdoc) {
}
public static void addDoc(String doc, String updateRequestProcessorChain) throws Exception {
+ addDocWithResponse(doc, updateRequestProcessorChain);
+ }
+
+ public static SolrQueryResponse addDocWithResponse(String doc, String updateRequestProcessorChain)
+ throws Exception {
Map params = new HashMap<>();
MultiMapSolrParams mmparams = new MultiMapSolrParams(params);
params.put(UpdateParams.UPDATE_CHAIN, new String[] {updateRequestProcessorChain});
@@ -1169,8 +1174,11 @@ public static void addDoc(String doc, String updateRequestProcessorChain) throws
UpdateRequestHandler handler = new UpdateRequestHandler();
handler.init(null);
req.setContentStreams(List.of(new ContentStreamBase.StringStream(doc)));
- handler.handleRequestBody(req, new SolrQueryResponse());
+ final SolrQueryResponse rsp = new SolrQueryResponse();
+ handler.handleRequestBody(req, rsp);
req.close();
+
+ return rsp;
}
/**