From b11d049e3d4116767df095730b0b7b6cbf744674 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Wed, 25 Mar 2026 13:06:18 +0100 Subject: [PATCH 01/17] [llm-document-erichment] Add first revision of the feature --- .../model/SolrChatModel.java | 201 ++++++++++++++++ .../model/package-info.java | 19 ++ .../store/ChatModelException.java | 30 +++ .../store/ChatModelStore.java | 67 ++++++ .../store/package-info.java | 19 ++ .../store/rest/ManagedChatModelStore.java | 200 ++++++++++++++++ .../store/rest/package-info.java | 19 ++ .../DocumentEnrichmentUpdateProcessor.java | 97 ++++++++ ...umentEnrichmentUpdateProcessorFactory.java | 156 ++++++++++++ .../update/processor/package-info.java | 19 ++ .../dummy-chat-model-ambiguous.json | 8 + .../dummy-chat-model-unsupported.json | 8 + .../modelChatExamples/dummy-chat-model.json | 7 + .../exception-throwing-chat-model.json | 6 + .../mistralai-chat-model.json | 13 + .../modelChatExamples/openai-model.json | 13 + .../cohere-model.json | 0 .../dummy-model-ambiguous.json | 0 .../dummy-model-unsupported.json | 0 .../dummy-model.json | 0 .../exception-throwing-model.json | 0 .../huggingface-model.json | 0 .../mistralai-model.json | 0 .../openai-model.json | 0 .../conf/schema-language-models.xml | 2 + ...richment-update-request-processor-only.xml | 62 +++++ .../conf/solrconfig-document-enrichment.xml | 83 +++++++ .../languagemodels/TestLanguageModelBase.java | 44 +++- .../model/DummyChatModel.java | 80 +++++++ .../model/ExceptionThrowingChatModel.java | 48 ++++ .../store/rest/TestChatModelManager.java | 184 +++++++++++++++ .../rest/TestChatModelManagerPersistence.java | 101 ++++++++ ...stManagedChatModelStoreInitialization.java | 54 +++++ ...tEnrichmentUpdateProcessorFactoryTest.java | 222 ++++++++++++++++++ ...DocumentEnrichmentUpdateProcessorTest.java | 219 +++++++++++++++++ 35 files changed, 1973 insertions(+), 8 deletions(-) create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/package-info.java create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelException.java create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelStore.java create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/package-info.java create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/package-info.java create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/package-info.java create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-ambiguous.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-unsupported.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/exception-throwing-chat-model.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/mistralai-chat-model.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/openai-model.json rename solr/modules/language-models/src/test-files/{modelExamples => modelEmbeddingExamples}/cohere-model.json (100%) rename solr/modules/language-models/src/test-files/{modelExamples => modelEmbeddingExamples}/dummy-model-ambiguous.json (100%) rename solr/modules/language-models/src/test-files/{modelExamples => modelEmbeddingExamples}/dummy-model-unsupported.json (100%) rename solr/modules/language-models/src/test-files/{modelExamples => modelEmbeddingExamples}/dummy-model.json (100%) rename solr/modules/language-models/src/test-files/{modelExamples => modelEmbeddingExamples}/exception-throwing-model.json (100%) rename solr/modules/language-models/src/test-files/{modelExamples => modelEmbeddingExamples}/huggingface-model.json (100%) rename solr/modules/language-models/src/test-files/{modelExamples => modelEmbeddingExamples}/mistralai-model.json (100%) rename solr/modules/language-models/src/test-files/{modelExamples => modelEmbeddingExamples}/openai-model.json (100%) create mode 100644 solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml create mode 100644 solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml create mode 100644 solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java create mode 100644 solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/ExceptionThrowingChatModel.java create mode 100644 solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java create mode 100644 solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java create mode 100644 solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestManagedChatModelStoreInitialization.java create mode 100644 solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java create mode 100644 solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java new file mode 100644 index 000000000000..9d06001e5903 --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.model; + +import dev.langchain4j.data.message.UserMessage; +import dev.langchain4j.model.chat.ChatModel; + +import java.lang.invoke.MethodHandles; +import java.lang.reflect.Method; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Map; +import java.util.Objects; +import dev.langchain4j.model.chat.request.ChatRequest; +import dev.langchain4j.model.chat.response.ChatResponse; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.solr.common.SolrException; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.languagemodels.documentenrichment.store.ChatModelException; +import org.apache.solr.languagemodels.documentenrichment.store.rest.ManagedChatModelStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This object wraps a {@link ChatModel} to produce the content of new fields from another. + * It's meant to be used as a managed resource with the {@link + * ManagedChatModelStore} + */ +public class SolrChatModel implements Accountable { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + private static final long BASE_RAM_BYTES = + RamUsageEstimator.shallowSizeOfInstance(SolrChatModel.class); + // timeout is type Duration + private static final String TIMEOUT_PARAM = "timeout"; + + // the following are Integer type + private static final String MAX_RETRIES_PARAM = "maxRetries"; + private static final String THINKING_BUDGET_TOKENS ="thinkingBudgetTokens"; + private static final String RANDOM_SEED = "randomSeed"; + + private final String name; + private final Map params; + private final ChatModel chatModel; + private final int hashCode; + + public static SolrChatModel getInstance( + SolrResourceLoader solrResourceLoader, + String className, + String name, + Map params) + throws ChatModelException { + try { + /* + * The idea here is to build a {@link dev.langchain4j.model.chat.ChatModel} using inversion + * of control. + * Each model has its own list of parameters we don't know beforehand, but each {@link dev.langchain4j.model.chat.ChatModel} class + * has its own builder that uses setters with the same name of the parameter in input. + * */ + ChatModel textToTextModel; + Class modelClass = solrResourceLoader.findClass(className, ChatModel.class); + var builder = modelClass.getMethod("builder").invoke(null); + if (params != null) { + /* + * This block of code has the responsibility of instantiate a {@link + * dev.langchain4j.model.chat.ChatModel} using the params provided.classes have + * params of The specific implementation of {@link + * dev.langchain4j.model.chat.ChatModel} is not known beforehand. So we benefit of + * the design choice in langchain4j that each subclass implementing {@link + * dev.langchain4j.model.chat.ChatModel} uses setters with the same name of the + * param. + */ + for (String paramName : params.keySet()) { + /* + * When a param is not primitive, we need to instantiate the object explicitly and then call the + * setter method. + * N.B. when adding support to new models, pay attention to all the parameters they + * support, some of them may require to be handled in here as separate switch cases + */ + switch (paramName) { + case TIMEOUT_PARAM -> builder + .getClass() + .getMethod(paramName, Duration.class) + .invoke(builder, Duration.ofSeconds((Long) params.get(paramName))); + + case MAX_RETRIES_PARAM, THINKING_BUDGET_TOKENS, RANDOM_SEED -> builder + .getClass() + .getMethod(paramName, Integer.class) + .invoke(builder, ((Long) params.get(paramName)).intValue()); + + /* + * For primitive params if there's only one setter available, we call it. + * If there's choice we default to the string one + */ + default -> { + ArrayList paramNameMatches = new ArrayList<>(); + for (var method : builder.getClass().getMethods()) { + if (paramName.equals(method.getName()) && method.getParameterCount() == 1) { + paramNameMatches.add(method); + } + } + if (paramNameMatches.size() == 1) { + paramNameMatches.getFirst().invoke(builder, params.get(paramName)); + } else { + try { + builder + .getClass() + .getMethod(paramName, String.class) + .invoke(builder, params.get(paramName).toString()); + } catch (NoSuchMethodException e) { + log.error("Parameter {} not supported by model {}", paramName, className); + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.getMessage(), e); + } + } + } + } + } + } + textToTextModel = (ChatModel) builder.getClass().getMethod("build").invoke(builder); + return new SolrChatModel(name, textToTextModel, params); + } catch (final Exception e) { + throw new ChatModelException("Model loading failed for " + className, e); + } + } + + public SolrChatModel( + String name, ChatModel chatModel, Map params) { + this.name = name; + this.chatModel = chatModel; + this.params = params; + this.hashCode = calculateHashCode(); + } + + public String chat(String text){ + ChatRequest chatRequest = ChatRequest.builder() + //.responseFormat(responseFormat) // used for structured outputs + .messages(UserMessage.from(text)) + .build(); + ChatResponse chatResponse = chatModel.chat(chatRequest); + return chatResponse.aiMessage().text(); // To change in case of structured output support + } + + @Override + public String toString() { + return getClass().getSimpleName() + "(name=" + getName() + ")"; + } + + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES + + RamUsageEstimator.sizeOfObject(name) + + RamUsageEstimator.sizeOfObject(chatModel); + } + + @Override + public int hashCode() { + return hashCode; + } + + private int calculateHashCode() { + final int prime = 31; + int result = 1; + result = (prime * result) + Objects.hashCode(name); + result = (prime * result) + Objects.hashCode(chatModel); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!(obj instanceof SolrChatModel)) return false; + final SolrChatModel other = (SolrChatModel) obj; + return Objects.equals(chatModel, other.chatModel) && Objects.equals(name, other.name); + } + + public String getName() { + return name; + } + + public String getChatModelClassName() { + return chatModel.getClass().getName(); + } + + public Map getParams() { + return params; + } +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/package-info.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/package-info.java new file mode 100644 index 000000000000..9b1575f35d58 --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** APIs and classes for implementing text to vector logic. */ +package org.apache.solr.languagemodels.documentenrichment.model; diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelException.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelException.java new file mode 100644 index 000000000000..a3315faaa234 --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelException.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.store; + +public class ChatModelException extends RuntimeException { + + private static final long serialVersionUID = 1L; + + public ChatModelException(String message) { + super(message); + } + + public ChatModelException(String message, Exception cause) { + super(message, cause); + } +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelStore.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelStore.java new file mode 100644 index 000000000000..96105919c17d --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelStore.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.store; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.solr.languagemodels.documentenrichment.model.SolrChatModel; + +/** Simple store to manage CRUD operations on the {@link SolrChatModel} */ +public class ChatModelStore { + + private final Map availableModels; + + public ChatModelStore() { + availableModels = Collections.synchronizedMap(new LinkedHashMap<>()); + } + + public SolrChatModel getModel(String name) { + return availableModels.get(name); + } + + public void clear() { + availableModels.clear(); + } + + public List getModels() { + synchronized (availableModels) { + final List availableModelsValues = + new ArrayList<>(availableModels.values()); + return Collections.unmodifiableList(availableModelsValues); + } + } + + @Override + public String toString() { + return "ChatModelStore [availableModels=" + availableModels.keySet() + "]"; + } + + public SolrChatModel delete(String modelName) { + return availableModels.remove(modelName); + } + + public void addModel(SolrChatModel modeldata) throws ChatModelException { + final String name = modeldata.getName(); + if (availableModels.putIfAbsent(modeldata.getName(), modeldata) != null) { + throw new ChatModelException( + "model '" + name + "' already exists. Please use a different name"); + } + } +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/package-info.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/package-info.java new file mode 100644 index 000000000000..ec20da4f87ee --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Contains model store related classes. */ +package org.apache.solr.languagemodels.documentenrichment.store; diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java new file mode 100644 index 000000000000..f8c6414354d8 --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.store.rest; + +import java.lang.invoke.MethodHandles; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import net.jcip.annotations.ThreadSafe; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.languagemodels.documentenrichment.model.SolrChatModel; +import org.apache.solr.languagemodels.documentenrichment.store.ChatModelException; +import org.apache.solr.languagemodels.documentenrichment.store.ChatModelStore; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.rest.BaseSolrResource; +import org.apache.solr.rest.ManagedResource; +import org.apache.solr.rest.ManagedResourceObserver; +import org.apache.solr.rest.ManagedResourceStorage; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Managed Resource wrapper for the {@link ChatModelStore} to expose it via REST */ +@ThreadSafe +public class ManagedChatModelStore extends ManagedResource + implements ManagedResource.ChildResourceSupport { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + /** the model store rest endpoint */ + public static final String REST_END_POINT = "/schema/chat-model-store"; + + /** Managed model store: the name of the attribute containing all the models of a model store */ + private static final String MODELS_JSON_FIELD = "models"; + + /** name of the attribute containing a class */ + static final String CLASS_KEY = "class"; + + /** name of the attribute containing a name */ + static final String NAME_KEY = "name"; + + /** name of the attribute containing parameters */ + static final String PARAMS_KEY = "params"; + + public static void registerManagedChatModelStore( + SolrResourceLoader solrResourceLoader, ManagedResourceObserver managedResourceObserver) { + solrResourceLoader + .getManagedResourceRegistry() + .registerManagedResource( + REST_END_POINT, ManagedChatModelStore.class, managedResourceObserver); + } + + public static ManagedChatModelStore getManagedModelStore(SolrCore core) { + return (ManagedChatModelStore) core.getRestManager().getManagedResource(REST_END_POINT); + } + + /** + * Returns the available models as a list of Maps objects. After an update the managed resources + * needs to return the resources in this format in order to store in json somewhere (zookeeper, + * disk...) + * + * @return the available models as a list of Maps objects + */ + private static List modelsAsManagedResources(List models) { + return models.stream() + .map(ManagedChatModelStore::toModelMap) + .collect(Collectors.toList()); + } + + @SuppressWarnings("unchecked") + public static SolrChatModel fromModelMap( + SolrResourceLoader solrResourceLoader, Map chatModel) { + return SolrChatModel.getInstance( + solrResourceLoader, + (String) chatModel.get(CLASS_KEY), // modelClassName + (String) chatModel.get(NAME_KEY), // modelName + (Map) chatModel.get(PARAMS_KEY)); + } + + private static LinkedHashMap toModelMap(SolrChatModel model) { + final LinkedHashMap modelMap = new LinkedHashMap<>(5, 1.0f); + modelMap.put(NAME_KEY, model.getName()); + modelMap.put(CLASS_KEY, model.getChatModelClassName()); + modelMap.put(PARAMS_KEY, model.getParams()); + return modelMap; + } + + private final ChatModelStore store; + private Object managedData; + + public ManagedChatModelStore( + String resourceId, SolrResourceLoader loader, ManagedResourceStorage.StorageIO storageIO) + throws SolrException { + super(resourceId, loader, storageIO); + store = new ChatModelStore(); + } + + @Override + protected ManagedResourceStorage createStorage( + ManagedResourceStorage.StorageIO storageIO, SolrResourceLoader loader) throws SolrException { + return new ManagedResourceStorage.JsonStorage(storageIO, loader, -1); + } + + @Override + protected void onManagedDataLoadedFromStorage(NamedList managedInitArgs, Object managedData) + throws SolrException { + store.clear(); + this.managedData = managedData; + } + + public void loadStoredModels() { + log.info("------ managed models ~ loading ------"); + + if ((managedData != null) && (managedData instanceof List)) { + @SuppressWarnings({"unchecked"}) + final List> chatModels = (List>) managedData; + for (final Map chatModel : chatModels) { + addModelFromMap(chatModel); + } + } + } + + private void addModelFromMap(Map modelMap) { + try { + addModel(fromModelMap(solrResourceLoader, modelMap)); + } catch (final ChatModelException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); + } + } + + public void addModel(SolrChatModel model) throws SolrException { + try { + if (log.isInfoEnabled()) { + log.info("adding model {}", model.getName()); + } + store.addModel(model); + } catch (final ChatModelException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); + } + } + + @SuppressWarnings("unchecked") + @Override + protected Object applyUpdatesToManagedData(Object updates) { + if (updates instanceof List) { + final List> chatModels = (List>) updates; + for (final Map chatModel : chatModels) { + addModelFromMap(chatModel); + } + } + + if (updates instanceof Map) { + final Map map = (Map) updates; + addModelFromMap(map); + } + + return modelsAsManagedResources(store.getModels()); + } + + @Override + public void doDeleteChild(BaseSolrResource endpoint, String childId) { + store.delete(childId); + storeManagedData(applyUpdatesToManagedData(null)); + } + + /** + * Called to retrieve a named part (the given childId) of the resource at the given endpoint. + * Note: since we have a unique child managed store we ignore the childId. + */ + @Override + public void doGet(BaseSolrResource endpoint, String childId) { + final SolrQueryResponse response = endpoint.getSolrResponse(); + response.add(MODELS_JSON_FIELD, modelsAsManagedResources(store.getModels())); + } + + public SolrChatModel getModel(String modelName) { + return store.getModel(modelName); + } + + @Override + public String toString() { + return "ManagedChatModelStore [store=" + store + "]"; + } +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/package-info.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/package-info.java new file mode 100644 index 000000000000..dfb013a8a902 --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Contains the {@link org.apache.solr.rest.ManagedResource} that encapsulate the model stores. */ +package org.apache.solr.languagemodels.documentenrichment.store.rest; diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java new file mode 100644 index 000000000000..a50160924e96 --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.languagemodels.documentenrichment.update.processor; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.SolrInputField; +import org.apache.solr.languagemodels.documentenrichment.model.SolrChatModel; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.update.AddUpdateCommand; +import org.apache.solr.update.processor.UpdateRequestProcessor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class DocumentEnrichmentUpdateProcessor extends UpdateRequestProcessor { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private IndexSchema schema; + private final String inputField; + private final String outputField; + private final String prompt; + private SolrChatModel chatModel; + + public DocumentEnrichmentUpdateProcessor( + String inputField, + String outputField, + String prompt, + SolrChatModel chatModel, + SolrQueryRequest req, + UpdateRequestProcessor next) { + super(next); + this.schema = req.getSchema(); + // prompt must contain "{input}" where the user wants to inject the input data to populate outputField + this.prompt = prompt; + this.inputField = inputField; + this.outputField = outputField; + this.chatModel = chatModel; + } + + /** + * @param cmd the update command in input containing the Document to process + * @throws IOException If there is a low-level I/O error + */ + @Override + public void processAdd(AddUpdateCommand cmd) throws IOException { + SolrInputDocument doc = cmd.getSolrInputDocument(); + SolrInputField inputFieldContent = doc.get(inputField); + if (!isNullOrEmpty(inputFieldContent)) { + try { + // as for now, only a plain text as prompt is sent to the model (no support for tools/skills/agents) + String toInject = inputFieldContent.getValue().toString(); + String injectedPrompt = prompt.replace("{input}", toInject); + String response = chatModel.chat(injectedPrompt); + /* TODO: check if the outputField is multivalued and adapt the code/llm call to deal with lists also, together + with structured output support + */ + doc.setField(outputField, response); + } catch (RuntimeException chatModelFailure) { + if (log.isErrorEnabled()) { + SchemaField uniqueKeyField = schema.getUniqueKeyField(); + String uniqueKeyFieldName = uniqueKeyField.getName(); + log.error( + "Could not process: {} for the document with {}: {}", + inputField, + uniqueKeyFieldName, + doc.getFieldValue(uniqueKeyFieldName), + chatModelFailure); + } + } + } + super.processAdd(cmd); + } + + protected boolean isNullOrEmpty(SolrInputField inputFieldContent) { + return (inputFieldContent == null + || inputFieldContent.getValue() == null + || inputFieldContent.getValue().toString().isEmpty()); + } +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java new file mode 100644 index 000000000000..b40904f55aca --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.languagemodels.documentenrichment.update.processor; + +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.RequiredSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.languagemodels.documentenrichment.model.SolrChatModel; +import org.apache.solr.languagemodels.documentenrichment.store.rest.ManagedChatModelStore; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.rest.ManagedResource; +import org.apache.solr.rest.ManagedResourceObserver; +import org.apache.solr.schema.StrField; +import org.apache.solr.schema.TextField; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.update.processor.UpdateRequestProcessor; +import org.apache.solr.update.processor.UpdateRequestProcessorFactory; +import org.apache.solr.util.plugin.SolrCoreAware; + +/** + * Insert in an existing field the output of the model coming from a textual field value. + * + *

The parameters supported are: + * + *

+ * <processor class="solr.llm.documentenrichment.update.processor.DocumentEnrichmentUpdateProcessorFactory">
+ *   <str name="inputField">textualField</str>
+ *   <str name="outputField">anotherTextualField</str>
+ *   <str name="model">ChatModel</str>
+ * </processor>
+ * 
+ * + * + * * + */ +public class DocumentEnrichmentUpdateProcessorFactory extends UpdateRequestProcessorFactory + implements SolrCoreAware, ManagedResourceObserver { + private static final String INPUT_FIELD_PARAM = "inputField"; + private static final String OUTPUT_FIELD_PARAM = "outputField"; + private static final String PROMPT = "prompt"; + private static final String MODEL_NAME = "model"; + private ManagedChatModelStore modelStore = null; + + private String inputField; // TODO: change with a list of input fields (check how it's done in other UpdateProcessor that supports this behaviour) + private String outputField; + private String prompt; + private String modelName; + private SolrParams params; + + @Override + public void init(final NamedList args) { + params = args.toSolrParams(); + RequiredSolrParams required = params.required(); + inputField = required.get(INPUT_FIELD_PARAM); + outputField = required.get(OUTPUT_FIELD_PARAM); + prompt = required.get(PROMPT); + if (!prompt.contains("{input}")) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "prompt must contain {input} placeholder"); + } + modelName = required.get(MODEL_NAME); + } + + @Override + public void inform(SolrCore core) { + final SolrResourceLoader solrResourceLoader = core.getResourceLoader(); + ManagedChatModelStore.registerManagedChatModelStore(solrResourceLoader, this); + } + + @Override + public void onManagedResourceInitialized(NamedList args, ManagedResource res) + throws SolrException { + if (res instanceof ManagedChatModelStore) { + modelStore = (ManagedChatModelStore) res; + } + if (modelStore != null) { + modelStore.loadStoredModels(); + } + } + + @Override + public UpdateRequestProcessor getInstance( + SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) { + IndexSchema latestSchema = req.getCore().getLatestSchema(); + + if (!latestSchema.isDynamicField(inputField) && !latestSchema.hasExplicitField(inputField)) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, "undefined field: \"" + inputField + "\""); + } + + final SchemaField outputFieldSchema = latestSchema.getField(outputField); + assertIsTextualField(outputFieldSchema); + + ManagedChatModelStore modelStore = + ManagedChatModelStore.getManagedModelStore(req.getCore()); + SolrChatModel chatModel = modelStore.getModel(modelName); + if (chatModel == null) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "The model configured in the Update Request Processor '" + + modelName + + "' can't be found in the store: " + + ManagedChatModelStore.REST_END_POINT); + } + + return new DocumentEnrichmentUpdateProcessor(inputField, outputField, prompt, chatModel, req, next); + } + // This is used on the outputField. Now the support is limited. Can be changed with structured outputs. + protected void assertIsTextualField(SchemaField schemaField) { + FieldType fieldType = schemaField.getType(); + if (!(fieldType instanceof StrField) && !(fieldType instanceof TextField)) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "only textual fields are compatible with Document Enrichment: " + + schemaField.getName()); + } + } + + public String getInputField() { + return inputField; + } + + public String getOutputField() { + return outputField; + } + + public String getPrompt() { + return prompt; + } + + public String getModelName() { + return modelName; + } +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/package-info.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/package-info.java new file mode 100644 index 000000000000..1aaedcf004fd --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Contains update request processor related classes. */ +package org.apache.solr.languagemodels.documentenrichment.update.processor; diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-ambiguous.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-ambiguous.json new file mode 100644 index 000000000000..1d737c9ae9d2 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-ambiguous.json @@ -0,0 +1,8 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-chat-1", + "params": { + "response": "enriched content", + "ambiguous": 10 + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-unsupported.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-unsupported.json new file mode 100644 index 000000000000..5f3404982b90 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-unsupported.json @@ -0,0 +1,8 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-chat-1", + "params": { + "response": "enriched content", + "unsupported": 10 + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model.json new file mode 100644 index 000000000000..f331535d5e9f --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-chat-1", + "params": { + "response": "enriched content" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/exception-throwing-chat-model.json b/solr/modules/language-models/src/test-files/modelChatExamples/exception-throwing-chat-model.json new file mode 100644 index 000000000000..29bcce318ada --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/exception-throwing-chat-model.json @@ -0,0 +1,6 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.ExceptionThrowingChatModel", + "name": "exception-throwing-chat-model", + "params": { + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/mistralai-chat-model.json b/solr/modules/language-models/src/test-files/modelChatExamples/mistralai-chat-model.json new file mode 100644 index 000000000000..b8a130191ceb --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/mistralai-chat-model.json @@ -0,0 +1,13 @@ +{ + "class": "dev.langchain4j.model.mistralai.MistralAiChatModel", + "name": "mistralai-chat-1", + "params": { + "baseUrl": "https://api.mistral.ai/v1", + "apiKey": "apiKey-mistralAI", + "modelName": "mistral-small-latest", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/openai-model.json b/solr/modules/language-models/src/test-files/modelChatExamples/openai-model.json new file mode 100644 index 000000000000..74ffde65e3b6 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/openai-model.json @@ -0,0 +1,13 @@ +{ + "class": "dev.langchain4j.model.openai.OpenAiChatModel", + "name": "openai-1", + "params": { + "baseUrl": "https://api.openai.com/v1", + "apiKey": "apiKey-openAI", + "modelName": "gpt-5.4-nano", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} diff --git a/solr/modules/language-models/src/test-files/modelExamples/cohere-model.json b/solr/modules/language-models/src/test-files/modelEmbeddingExamples/cohere-model.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/cohere-model.json rename to solr/modules/language-models/src/test-files/modelEmbeddingExamples/cohere-model.json diff --git a/solr/modules/language-models/src/test-files/modelExamples/dummy-model-ambiguous.json b/solr/modules/language-models/src/test-files/modelEmbeddingExamples/dummy-model-ambiguous.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/dummy-model-ambiguous.json rename to solr/modules/language-models/src/test-files/modelEmbeddingExamples/dummy-model-ambiguous.json diff --git a/solr/modules/language-models/src/test-files/modelExamples/dummy-model-unsupported.json b/solr/modules/language-models/src/test-files/modelEmbeddingExamples/dummy-model-unsupported.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/dummy-model-unsupported.json rename to solr/modules/language-models/src/test-files/modelEmbeddingExamples/dummy-model-unsupported.json diff --git a/solr/modules/language-models/src/test-files/modelExamples/dummy-model.json b/solr/modules/language-models/src/test-files/modelEmbeddingExamples/dummy-model.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/dummy-model.json rename to solr/modules/language-models/src/test-files/modelEmbeddingExamples/dummy-model.json diff --git a/solr/modules/language-models/src/test-files/modelExamples/exception-throwing-model.json b/solr/modules/language-models/src/test-files/modelEmbeddingExamples/exception-throwing-model.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/exception-throwing-model.json rename to solr/modules/language-models/src/test-files/modelEmbeddingExamples/exception-throwing-model.json diff --git a/solr/modules/language-models/src/test-files/modelExamples/huggingface-model.json b/solr/modules/language-models/src/test-files/modelEmbeddingExamples/huggingface-model.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/huggingface-model.json rename to solr/modules/language-models/src/test-files/modelEmbeddingExamples/huggingface-model.json diff --git a/solr/modules/language-models/src/test-files/modelExamples/mistralai-model.json b/solr/modules/language-models/src/test-files/modelEmbeddingExamples/mistralai-model.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/mistralai-model.json rename to solr/modules/language-models/src/test-files/modelEmbeddingExamples/mistralai-model.json diff --git a/solr/modules/language-models/src/test-files/modelExamples/openai-model.json b/solr/modules/language-models/src/test-files/modelEmbeddingExamples/openai-model.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/openai-model.json rename to solr/modules/language-models/src/test-files/modelEmbeddingExamples/openai-model.json diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml index ef93fbc057dd..5334762cc388 100644 --- a/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml @@ -36,11 +36,13 @@ + + diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml new file mode 100644 index 000000000000..522fbfe09267 --- /dev/null +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml @@ -0,0 +1,62 @@ + + + + + ${tests.luceneMatchVersion:LATEST} + ${solr.data.dir:} + + + + + + + + + + + + + + + 15000 + false + + + 1000 + + + ${solr.data.dir:} + + + + + + explicit + json + true + id + + + + + + string_field + enriched_field + Summarize this content: {input} + dummy-chat-1 + + + + + diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml new file mode 100644 index 000000000000..02015f6296ab --- /dev/null +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml @@ -0,0 +1,83 @@ + + + + + ${tests.luceneMatchVersion:LATEST} + ${solr.data.dir:} + + + + + + + + + + + + + + + 15000 + false + + + 1000 + + + ${solr.data.dir:} + + + + + + explicit + json + true + id + + + + + + string_field + enriched_field + Summarize this content: {input} + dummy-chat-1 + + + + + + + string_field + enriched_field + Summarize this content: {input} + exception-throwing-chat-model + + + + + + + + string_field + enriched_field + Summarize this content: {input} + dummy-chat-1 + + + + + diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java index aaf3143e3513..d7a4ac9b8c96 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java @@ -26,6 +26,7 @@ import java.util.List; import org.apache.commons.io.file.PathUtils; import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.languagemodels.documentenrichment.store.rest.ManagedChatModelStore; import org.apache.solr.languagemodels.textvectorisation.store.rest.ManagedTextToVectorModelStore; import org.apache.solr.util.RestTestBase; import org.slf4j.Logger; @@ -38,11 +39,13 @@ public class TestLanguageModelBase extends RestTestBase { protected static Path tmpSolrHome; protected static Path tmpConfDir; - public static final String MODEL_FILE_NAME = "_schema_text-to-vector-model-store.json"; + public static final String EMBEDDING_MODEL_FILE_NAME = "_schema_text-to-vector-model-store.json"; + public static final String CHAT_MODEL_FILE_NAME = "_schema_chat-model-store.json"; protected static final String COLLECTION = "collection1"; protected static final String CONF_DIR = COLLECTION + "/conf"; protected static Path embeddingModelStoreFile = null; + protected static Path chatModelStoreFile = null; protected static String IDField = "id"; protected static String vectorField = "vector"; @@ -61,17 +64,26 @@ protected static void initFolders(boolean isPersistent) throws Exception { tmpSolrHome = createTempDir(); tmpConfDir = tmpSolrHome.resolve(CONF_DIR); PathUtils.copyDirectory(TEST_PATH(), tmpSolrHome.toAbsolutePath()); - final Path modelStore = tmpConfDir.resolve(MODEL_FILE_NAME); + final Path embeddingStore = tmpConfDir.resolve(EMBEDDING_MODEL_FILE_NAME); + final Path chatStore = tmpConfDir.resolve(CHAT_MODEL_FILE_NAME); if (isPersistent) { - embeddingModelStoreFile = modelStore; + embeddingModelStoreFile = embeddingStore; + chatModelStoreFile = chatStore; } - if (Files.exists(modelStore)) { + if (Files.exists(embeddingStore)) { if (log.isInfoEnabled()) { - log.info("remove model store config file in {}", modelStore.toAbsolutePath()); + log.info("remove model store config file in {}", embeddingStore.toAbsolutePath()); } - Files.delete(modelStore); + Files.delete(embeddingStore); + } + + if (Files.exists(chatStore)) { + if (log.isInfoEnabled()) { + log.info("remove chat model store config file in {}", chatStore.toAbsolutePath()); + } + Files.delete(chatStore); } System.setProperty("managed.schema.mutable", "true"); @@ -90,7 +102,7 @@ protected static void afterTest() throws Exception { } public static void loadModel(String fileName, String status) throws Exception { - final URL url = TestLanguageModelBase.class.getResource("/modelExamples/" + fileName); + final URL url = TestLanguageModelBase.class.getResource("/modelEmbeddingExamples/" + fileName); final String multipleModels = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8); assertJPut( @@ -100,13 +112,29 @@ public static void loadModel(String fileName, String status) throws Exception { } public static void loadModel(String fileName) throws Exception { - final URL url = TestLanguageModelBase.class.getResource("/modelExamples/" + fileName); + final URL url = TestLanguageModelBase.class.getResource("/modelEmbeddingExamples/" + fileName); final String multipleModels = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8); assertJPut( ManagedTextToVectorModelStore.REST_END_POINT, multipleModels, "/responseHeader/status==0"); } + public static void loadChatModel(String fileName, String status) throws Exception { + final URL url = TestLanguageModelBase.class.getResource("/modelChatExamples/" + fileName); + final String model = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8); + + assertJPut( + ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==" + status); + } + + public static void loadChatModel(String fileName) throws Exception { + final URL url = TestLanguageModelBase.class.getResource("/modelChatExamples/" + fileName); + final String model = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8); + + assertJPut( + ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==0"); + } + protected static void prepareIndex() throws Exception { List docsToIndex = prepareDocs(); for (SolrInputDocument doc : docsToIndex) { diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java new file mode 100644 index 000000000000..753150cb6f02 --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.model; + +import dev.langchain4j.data.message.AiMessage; +import dev.langchain4j.model.chat.ChatModel; +import dev.langchain4j.model.chat.request.ChatRequest; +import dev.langchain4j.model.chat.response.ChatResponse; + +/** + * A deterministic {@link ChatModel} for testing. It returns a fixed response string regardless of + * the input, allowing tests to assert exact enriched-field values without real API calls. + * + *

The builder also exposes {@code unsupported} and {@code ambiguous} setter methods to exercise + * the reflection-based parameter handling in {@link + * org.apache.solr.languagemodels.documentenrichment.model.SolrChatModel#getInstance}. + */ +public class DummyChatModel implements ChatModel { + + private final String response; + + public DummyChatModel(String response) { + this.response = response; + } + + @Override + public ChatResponse chat(ChatRequest chatRequest) { + return ChatResponse.builder().aiMessage(AiMessage.from(response)).build(); + } + + public static DummyChatModelBuilder builder() { + return new DummyChatModelBuilder(); + } + + public static class DummyChatModelBuilder { + private String response = "dummy response"; + private int intValue; + + public DummyChatModelBuilder() {} + + public DummyChatModelBuilder response(String response) { + this.response = response; + return this; + } + + /** Intentionally has no String overload so the reflection code raises a BAD_REQUEST error. */ + public DummyChatModelBuilder unsupported(Integer input) { + return this; + } + + /** Two overloads make this param "ambiguous": the reflection code should default to String. */ + public DummyChatModelBuilder ambiguous(int input) { + this.intValue = input; + return this; + } + + public DummyChatModelBuilder ambiguous(String input) { + this.intValue = Integer.valueOf(input); + return this; + } + + public DummyChatModel build() { + return new DummyChatModel(this.response); + } + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/ExceptionThrowingChatModel.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/ExceptionThrowingChatModel.java new file mode 100644 index 000000000000..e5eda8d493f1 --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/ExceptionThrowingChatModel.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.model; + +import dev.langchain4j.model.chat.ChatModel; +import dev.langchain4j.model.chat.request.ChatRequest; +import dev.langchain4j.model.chat.response.ChatResponse; + +/** + * A {@link ChatModel} that always throws a {@link RuntimeException}. Used to verify that {@link + * org.apache.solr.languagemodels.documentenrichment.update.processor.DocumentEnrichmentUpdateProcessor} + * handles chat-model failures gracefully (logs the error and continues indexing without the + * enriched field). + */ +public class ExceptionThrowingChatModel implements ChatModel { + + @Override + public ChatResponse chat(ChatRequest chatRequest) { + throw new RuntimeException("Failed to enrich"); + } + + public static ExceptionThrowingChatModelBuilder builder() { + return new ExceptionThrowingChatModelBuilder(); + } + + public static class ExceptionThrowingChatModelBuilder { + + public ExceptionThrowingChatModelBuilder() {} + + public ExceptionThrowingChatModel build() { + return new ExceptionThrowingChatModel(); + } + } +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java new file mode 100644 index 000000000000..60e97e5a6f19 --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.store.rest; + +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.languagemodels.TestLanguageModelBase; +import org.apache.solr.languagemodels.documentenrichment.update.processor.DocumentEnrichmentUpdateProcessorFactory; +import org.apache.solr.rest.ManagedResource; +import org.apache.solr.rest.ManagedResourceStorage; +import org.apache.solr.rest.RestManager; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestChatModelManager extends TestLanguageModelBase { + + @BeforeClass + public static void init() throws Exception { + setupTest("solrconfig-document-enrichment.xml", "schema-language-models.xml", false, false); + } + + @AfterClass + public static void cleanup() throws Exception { + afterTest(); + } + + @Test + public void test() throws Exception { + final SolrResourceLoader loader = new SolrResourceLoader(tmpSolrHome); + + final RestManager.Registry registry = loader.getManagedResourceRegistry(); + assertNotNull( + "Expected a non-null RestManager.Registry from the SolrResourceLoader!", registry); + + final String resourceId = "/schema/mstore1"; + registry.registerManagedResource( + resourceId, ManagedChatModelStore.class, new DocumentEnrichmentUpdateProcessorFactory()); + + final NamedList initArgs = new NamedList<>(); + + final RestManager restManager = new RestManager(); + restManager.init(loader, initArgs, new ManagedResourceStorage.InMemoryStorageIO()); + + final ManagedResource res = restManager.getManagedResource(resourceId); + assertTrue(res instanceof ManagedChatModelStore); + assertEquals(res.getResourceId(), resourceId); + } + + @Test + public void testRestManagerEndpoints() throws Exception { + assertJQ("/schema/managed", "/responseHeader/status==0"); + + final String openAiClassName = "dev.langchain4j.model.openai.OpenAiChatModel"; + + // fails — no params provided + String model = "{ \"name\":\"testChatModel1\", \"class\":\"" + openAiClassName + "\"}"; + assertJPut(ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==400"); + + // success + model = + "{ name:\"testChatModel2\", class:\"" + + openAiClassName + + "\"," + + "params:{" + + "baseUrl:\"https://api.openai.com/v1\"," + + "apiKey:\"testApiKey2\"," + + "modelName:\"gpt-4o-mini\"," + + "logRequests:true," + + "logResponses:false" + + "}}"; + assertJPut(ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==0"); + + // success — multiple models in one PUT + final String multipleModels = + "[{ name:\"testChatModel3\", class:\"" + + openAiClassName + + "\"," + + "params:{baseUrl:\"https://api.openai.com/v1\"," + + "apiKey:\"testApiKey3\"," + + "modelName:\"gpt-4o-mini\"," + + "logRequests:true," + + "logResponses:false" + + "}}\n" + + ",{ name:\"testChatModel4\", class:\"" + + openAiClassName + + "\"," + + "params:{baseUrl:\"https://api.openai.com/v1\"," + + "apiKey:\"testApiKey4\"," + + "modelName:\"gpt-4o-mini\"," + + "logRequests:true," + + "logResponses:false" + + "}}]"; + assertJPut(ManagedChatModelStore.REST_END_POINT, multipleModels, "/responseHeader/status==0"); + + final String qryResult = JQ(ManagedChatModelStore.REST_END_POINT); + assertTrue( + qryResult.contains("\"name\":\"testChatModel2\"") + && qryResult.contains("\"name\":\"testChatModel3\"") + && qryResult.contains("\"name\":\"testChatModel4\"")); + + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='testChatModel2'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[1]/name=='testChatModel3'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[2]/name=='testChatModel4'"); + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/testChatModel2"); + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/testChatModel3"); + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/testChatModel4"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models==[]'"); + } + + @Test + public void loadChatModel_openAi_shouldLoadModelConfig() throws Exception { + loadChatModel("openai-model.json"); + + final String modelName = "openai-1"; + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='https://api.openai.com/v1'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-openAI'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/modelName=='gpt-5.4-nano'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); + + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + modelName); + } + + @Test + public void loadChatModel_mistralAi_shouldLoadModelConfig() throws Exception { + loadChatModel("mistralai-chat-model.json"); + + final String modelName = "mistralai-chat-1"; + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='https://api.mistral.ai/v1'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-mistralAI'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/modelName=='mistral-small-latest'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); + + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + modelName); + } + + @Test + public void loadChatModel_dummyUnsupportedParam_shouldRaiseError() throws Exception { + loadChatModel("dummy-chat-model-unsupported.json", "400"); + } + + @Test + public void loadChatModel_dummyAmbiguousParam_shouldDefaultToString() throws Exception { + loadChatModel("dummy-chat-model-ambiguous.json"); + + final String modelName = "dummy-chat-1"; + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/ambiguous==10"); + + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + modelName); + } +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java new file mode 100644 index 000000000000..654c98556ab4 --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.store.rest; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import org.apache.solr.common.util.Utils; +import org.apache.solr.languagemodels.TestLanguageModelBase; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class TestChatModelManagerPersistence extends TestLanguageModelBase { + + @Before + public void init() throws Exception { + setupTest("solrconfig-document-enrichment.xml", "schema-language-models.xml", false, true); + } + + @After + public void cleanup() throws Exception { + afterTest(); + } + + @Test + public void testModelAreStoredCompact() throws Exception { + loadChatModel("openai-model.json"); + + final String JSONOnDisk = Files.readString(chatModelStoreFile, StandardCharsets.UTF_8); + Object objectFromDisk = Utils.fromJSONString(JSONOnDisk); + assertEquals(new String(Utils.toJSON(objectFromDisk, -1), UTF_8), JSONOnDisk); + } + + @Test + public void testModelStorePersistence() throws Exception { + // check store is empty at start + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/==[]"); + + // load a model + loadChatModel("openai-model.json"); + + final String modelName = "openai-1"; + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='https://api.openai.com/v1'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-openAI'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gpt-5.4-nano'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); + + // check persistence after reload + restTestHarness.reload(); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='https://api.openai.com/v1'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-openAI'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gpt-5.4-nano'"); + + // check persistence after restart + getJetty().stop(); + getJetty().start(); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gpt-5.4-nano'"); + + // delete model and verify persistence of the empty state + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + modelName); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/==[]"); + + restTestHarness.reload(); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/==[]"); + + getJetty().stop(); + getJetty().start(); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/==[]"); + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestManagedChatModelStoreInitialization.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestManagedChatModelStoreInitialization.java new file mode 100644 index 000000000000..0106558401a8 --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestManagedChatModelStoreInitialization.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.store.rest; + +import org.apache.solr.languagemodels.TestLanguageModelBase; +import org.junit.After; +import org.junit.Test; + +public class TestManagedChatModelStoreInitialization extends TestLanguageModelBase { + + @After + public void cleanUp() throws Exception { + afterTest(); + } + + @Test + public void managedChatModelStore_whenUpdateRequestComponentConfigured_shouldBeInitialized() + throws Exception { + setupTest( + "solrconfig-document-enrichment-update-request-processor-only.xml", + "schema-language-models.xml", + false, + false); + + assertJQ(ManagedChatModelStore.REST_END_POINT, "/responseHeader/status==0"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models==[]"); + } + + @Test + public void managedChatModelStore_whenNoComponents_shouldNotBeInitialized() throws Exception { + setupTest( + "solrconfig-language-models-no-components.xml", "schema-language-models.xml", false, false); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/responseHeader/status==400", + "/error/msg=='No REST managed resource registered for path " + + ManagedChatModelStore.REST_END_POINT + + "'"); + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java new file mode 100644 index 000000000000..91d773dbeb61 --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.update.processor; + +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.languagemodels.TestLanguageModelBase; +import org.apache.solr.languagemodels.documentenrichment.model.SolrChatModel; +import org.apache.solr.languagemodels.documentenrichment.store.rest.ManagedChatModelStore; +import org.apache.solr.request.SolrQueryRequestBase; +import org.apache.solr.update.processor.UpdateRequestProcessor; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +public class DocumentEnrichmentUpdateProcessorFactoryTest extends TestLanguageModelBase { + + @BeforeClass + public static void init() throws Exception { + setupTest("solrconfig-document-enrichment.xml", "schema-language-models.xml", false, false); + } + + @AfterClass + public static void cleanup() throws Exception { + afterTest(); + } + + SolrCore collection1; + + @Before + public void setup() { + collection1 = solrTestRule.getCoreContainer().getCore("collection1"); + } + + @After + public void after() { + collection1.close(); + } + + @Test + public void init_fullArgs_shouldInitAllParams() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Summarize: {input}"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + factory.init(args); + + assertEquals("string_field", factory.getInputField()); + assertEquals("enriched_field", factory.getOutputField()); + assertEquals("Summarize: {input}", factory.getPrompt()); + assertEquals("model1", factory.getModelName()); + } + + @Test + public void init_nullInputField_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("outputField", "enriched_field"); + args.add("prompt", "Summarize: {input}"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("Missing required parameter: inputField", e.getMessage()); + } + + @Test + public void init_nullOutputField_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("prompt", "Summarize: {input}"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("Missing required parameter: outputField", e.getMessage()); + } + + @Test + public void init_nullPrompt_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("Missing required parameter: prompt", e.getMessage()); + } + + @Test + public void init_missingPlaceholderPrompt_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Summarize:"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("prompt must contain {input} placeholder", e.getMessage()); + } + + @Test + public void init_nullModel_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Summarize: {input}"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("Missing required parameter: model", e.getMessage()); + } + + /* Following tests depend on a real solr schema and depend on BeforeClass-AfterClass methods */ + + @Test + public void init_notExistentOutputField_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "notExistentOutput"); + args.add("prompt", "Summarize: {input}"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + ModifiableSolrParams params = new ModifiableSolrParams(); + SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; + factory.init(args); + + SolrException e = assertThrows(SolrException.class, () -> factory.getInstance(req, null, null)); + assertEquals("undefined field: \"notExistentOutput\"", e.getMessage()); + } + + @Test + public void init_notTextualOutputField_shouldThrowExceptionWithDetailedMessage() { + // vector is a DenseVectorField — not a textual field + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "vector"); + args.add("prompt", "Summarize: {input}"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + ModifiableSolrParams params = new ModifiableSolrParams(); + SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; + factory.init(args); + + SolrException e = assertThrows(SolrException.class, () -> factory.getInstance(req, null, null)); + assertEquals( + "only textual fields are compatible with Document Enrichment: vector", e.getMessage()); + } + + @Test + public void init_notExistentInputField_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "notExistentInput"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Summarize: {input}"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + ModifiableSolrParams params = new ModifiableSolrParams(); + SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; + factory.init(args); + + SolrException e = assertThrows(SolrException.class, () -> factory.getInstance(req, null, null)); + assertEquals("undefined field: \"notExistentInput\"", e.getMessage()); + } + + @Test + public void init_dynamicInputField_shouldNotThrowException() { + UpdateRequestProcessor instance = + createUpdateProcessor("text_s", "enriched_field", collection1, "model1"); + assertNotNull(instance); + } + + private UpdateRequestProcessor createUpdateProcessor( + String inputFieldName, String outputFieldName, SolrCore core, String modelName) { + NamedList args = new NamedList<>(); + + ManagedChatModelStore.getManagedModelStore(core) + .addModel(new SolrChatModel(modelName, null, null)); + args.add("inputField", inputFieldName); + args.add("outputField", outputFieldName); + args.add("prompt", "Summarize: {input}"); + args.add("model", modelName); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + ModifiableSolrParams params = new ModifiableSolrParams(); + factory.init(args); + + SolrQueryRequestBase req = new SolrQueryRequestBase(core, params) {}; + + return factory.getInstance(req, null, null); + } +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java new file mode 100644 index 000000000000..76d691cdebad --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java @@ -0,0 +1,219 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.update.processor; + +import java.io.IOException; +import java.util.Map; +import org.apache.solr.client.solrj.RemoteSolrException; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.request.SolrQuery; +import org.apache.solr.client.solrj.request.UpdateRequest; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.languagemodels.TestLanguageModelBase; +import org.apache.solr.languagemodels.documentenrichment.store.rest.ManagedChatModelStore; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class DocumentEnrichmentUpdateProcessorTest extends TestLanguageModelBase { + + @BeforeClass + public static void init() throws Exception { + setupTest("solrconfig-document-enrichment.xml", "schema-language-models.xml", false, false); + } + + @AfterClass + public static void cleanup() throws Exception { + afterTest(); + } + + @After + public void afterEachTest() throws Exception { + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/dummy-chat-1"); + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/exception-throwing-chat-model"); + } + + @Test + public void processAdd_inputField_shouldEnrichInputField() throws Exception { + loadChatModel("dummy-chat-model.json"); + + addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field=='enriched content'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/enriched_field=='enriched content'"); + + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/dummy-1"); // clean up + } + + /* + This test looks for the 'dummy-chat-1' model, but such model is not loaded — + the model store is empty, so the update fails. + */ + @Test + public void processAdd_modelNotFound_shouldThrowException() { + RuntimeException thrown = + assertThrows( + "model not found should throw an exception", + RemoteSolrException.class, + () -> + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), + "documentEnrichment")); + assertTrue( + thrown + .getMessage() + .contains( + "The model configured in the Update Request Processor 'dummy-chat-1' can't be found in the store: /schema/chat-model-store")); + } + + @Test + public void processAdd_emptyInputField_shouldLogAndIndexWithNoEnrichedField() throws Exception { + loadChatModel("dummy-chat-model.json"); + addWithChain(sdoc("id", "99", "string_field", ""), "documentEnrichment"); + addWithChain(sdoc("id", "98", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", // no enriched field for doc 99 + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/enriched_field=='enriched content'"); + } + + @Test + public void processAdd_nullInputField_shouldLogAndIndexWithNoEnrichedField() throws Exception { + loadChatModel("dummy-chat-model.json"); + addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + assertU(adoc("id", "98")); // no string_field + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field=='enriched content'", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); // no enriched field for doc 98 + } + + @Test + public void processAdd_failingEnrichment_shouldLogAndIndexWithNoEnrichedField() throws Exception { + loadChatModel("exception-throwing-chat-model.json"); + addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "failingDocumentEnrichment"); + addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "failingDocumentEnrichment"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", // no enriched field for doc 99 + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); // no enriched field for doc 98 + } + + @Test + public void processAtomicUpdate_shouldTriggerEnrichmentAndFetchTheStoredContent() + throws Exception { + // Verifies that when using a processor chain configured for partial updates + // (i.e., DistributedUpdateProcessorFactory before DocumentEnrichmentUpdateProcessorFactory), + // the system correctly retrieves the stored value of string_field and generates the + // enriched content for the document. + loadChatModel("dummy-chat-model.json"); + assertU(adoc("id", "99", "string_field", "Vegeta is the saiyan prince.")); + assertU(adoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth.")); + assertU(commit()); + + SolrInputDocument atomicDoc = new SolrInputDocument(); + atomicDoc.setField("id", "99"); + atomicDoc.setField("enriched", Map.of("set", "true")); + addWithChain(atomicDoc, "documentEnrichmentForPartialUpdates"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field=='enriched content'", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field==" // no enriched field for document 98 + ); + } + + @Test + public void processAtomicUpdate_shouldReplaceExistingEnrichedFieldNotAppend() throws Exception { + // Verifies that when a document already contains an enriched_field and string_field is + // modified via atomic update, the enriched content is recomputed and replaces the previous + // value rather than being appended. + loadChatModel("dummy-chat-model.json"); + addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment"); + assertU(commit()); + + SolrInputDocument atomicDoc = new SolrInputDocument(); + atomicDoc.setField("id", "99"); + atomicDoc.setField("string_field", Map.of("set", "Vegeta is the saiyan prince from the Dragon Ball series.")); + addWithChain(atomicDoc, "documentEnrichmentForPartialUpdates"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field=='enriched content'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/enriched_field=='enriched content'"); + } + + private SolrQuery getEnrichmentQuery() { + final SolrQuery query = new SolrQuery(); + query.setQuery("*:*"); + query.add("fl", "id,enriched_field"); + query.add("sort", "id desc"); + return query; + } + + void addWithChain(SolrInputDocument document, String updateChain) + throws SolrServerException, IOException { + UpdateRequest req = new UpdateRequest(); + req.add(document); + req.setParam("update.chain", updateChain); + solrTestRule.getSolrClient("collection1").request(req); + } +} From dfb27abb1d7c34a6401d3cf23121bddca0174e8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Thu, 26 Mar 2026 10:01:24 +0100 Subject: [PATCH 02/17] [llm-document-enrichment] First working version --- .../model/DummyChatModelTest.java | 48 +++++++++++++++++++ .../store/rest/TestChatModelManager.java | 6 +-- 2 files changed, 51 insertions(+), 3 deletions(-) create mode 100644 solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModelTest.java diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModelTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModelTest.java new file mode 100644 index 000000000000..6449b7b2f55c --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModelTest.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.model; + +import dev.langchain4j.data.message.UserMessage; +import dev.langchain4j.model.chat.request.ChatRequest; +import org.apache.solr.SolrTestCase; +import org.junit.Test; + +public class DummyChatModelTest extends SolrTestCase { + + @Test + public void constructAndChat() throws Exception { + assertEquals( + "hello world", + new DummyChatModel("hello world") + .chat(ChatRequest.builder().messages(UserMessage.from("any input")).build()) + .aiMessage() + .text()); + assertEquals( + "fixed response", + new DummyChatModel("fixed response") + .chat(ChatRequest.builder().messages(UserMessage.from("another input")).build()) + .aiMessage() + .text()); + assertEquals( + "dummy response", + DummyChatModel.builder() + .build() + .chat(ChatRequest.builder().messages(UserMessage.from("default")).build()) + .aiMessage() + .text()); + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java index 60e97e5a6f19..dc1b67e0debb 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java @@ -68,11 +68,11 @@ public void testRestManagerEndpoints() throws Exception { final String openAiClassName = "dev.langchain4j.model.openai.OpenAiChatModel"; // fails — no params provided - String model = "{ \"name\":\"testChatModel1\", \"class\":\"" + openAiClassName + "\"}"; - assertJPut(ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==400"); +// String model = "{ \"name\":\"testChatModel1\", \"class\":\"" + openAiClassName + "\"}"; +// assertJPut(ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==400"); // success - model = + String model = "{ name:\"testChatModel2\", class:\"" + openAiClassName + "\"," From 827548a6549343e8bfe53d11715a162c0e3e05ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Thu, 26 Mar 2026 12:32:55 +0100 Subject: [PATCH 03/17] [llm-document-enrichment] Add promptFile feature to DocumentEnrichmentUpdateProcessorFactory --- ...umentEnrichmentUpdateProcessorFactory.java | 63 +++++++++++++++++-- .../conf/prompt-no-placeholder.txt | 1 + .../solr/collection1/conf/prompt.txt | 1 + ...tEnrichmentUpdateProcessorFactoryTest.java | 51 ++++++++++++++- 4 files changed, 109 insertions(+), 7 deletions(-) create mode 100644 solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-no-placeholder.txt create mode 100644 solr/modules/language-models/src/test-files/solr/collection1/conf/prompt.txt diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java index b40904f55aca..8e26971da485 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java @@ -17,6 +17,9 @@ package org.apache.solr.languagemodels.documentenrichment.update.processor; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.RequiredSolrParams; import org.apache.solr.common.params.SolrParams; @@ -47,24 +50,38 @@ * <processor class="solr.llm.documentenrichment.update.processor.DocumentEnrichmentUpdateProcessorFactory"> * <str name="inputField">textualField</str> * <str name="outputField">anotherTextualField</str> + * <str name="prompt">Summarize: {input}</str> * <str name="model">ChatModel</str> * </processor> * * + *

Alternatively, the prompt can be loaded from a text file using {@code promptFile}: * - * * + *

+ * <processor class="solr.llm.documentenrichment.update.processor.DocumentEnrichmentUpdateProcessorFactory">
+ *   <str name="inputField">textualField</str>
+ *   <str name="outputField">anotherTextualField</str>
+ *   <str name="promptFile">prompt.txt</str>
+ *   <str name="model">ChatModel</str>
+ * </processor>
+ * 
+ * + *

Exactly one of {@code prompt} or {@code promptFile} must be provided. The prompt (from either + * source) must contain the {@code {input}} placeholder. */ public class DocumentEnrichmentUpdateProcessorFactory extends UpdateRequestProcessorFactory implements SolrCoreAware, ManagedResourceObserver { private static final String INPUT_FIELD_PARAM = "inputField"; private static final String OUTPUT_FIELD_PARAM = "outputField"; private static final String PROMPT = "prompt"; + private static final String PROMPT_FILE = "promptFile"; private static final String MODEL_NAME = "model"; private ManagedChatModelStore modelStore = null; private String inputField; // TODO: change with a list of input fields (check how it's done in other UpdateProcessor that supports this behaviour) private String outputField; private String prompt; + private String promptFile; private String modelName; private SolrParams params; @@ -74,19 +91,51 @@ public void init(final NamedList args) { RequiredSolrParams required = params.required(); inputField = required.get(INPUT_FIELD_PARAM); outputField = required.get(OUTPUT_FIELD_PARAM); - prompt = required.get(PROMPT); - if (!prompt.contains("{input}")) { + modelName = required.get(MODEL_NAME); + + String inlinePrompt = params.get(PROMPT); + String promptFilePath = params.get(PROMPT_FILE); + + if (inlinePrompt == null && promptFilePath == null) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, - "prompt must contain {input} placeholder"); + "Either 'prompt' or 'promptFile' must be provided"); } - modelName = required.get(MODEL_NAME); + if (inlinePrompt != null && promptFilePath != null) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "Only one of 'prompt' or 'promptFile' can be provided, not both"); + } + if (inlinePrompt != null) { + if (!inlinePrompt.contains("{input}")) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "prompt must contain {input} placeholder"); + } + this.prompt = inlinePrompt; + } + this.promptFile = promptFilePath; } @Override public void inform(SolrCore core) { final SolrResourceLoader solrResourceLoader = core.getResourceLoader(); ManagedChatModelStore.registerManagedChatModelStore(solrResourceLoader, this); + if (promptFile != null) { + try (InputStream is = solrResourceLoader.openResource(promptFile)) { + prompt = new String(is.readAllBytes(), StandardCharsets.UTF_8).trim(); + } catch (IOException e) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "Cannot read prompt file: " + promptFile, + e); + } + if (!prompt.contains("{input}")) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "prompt must contain {input} placeholder"); + } + } } @Override @@ -153,4 +202,8 @@ public String getPrompt() { public String getModelName() { return modelName; } + + public String getPromptFile() { + return promptFile; + } } diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-no-placeholder.txt b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-no-placeholder.txt new file mode 100644 index 000000000000..c43c5399dc07 --- /dev/null +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-no-placeholder.txt @@ -0,0 +1 @@ +Summarize this content without the placeholder. \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt.txt b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt.txt new file mode 100644 index 000000000000..a9e89d5bd9dc --- /dev/null +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt.txt @@ -0,0 +1 @@ +Summarize this content: {input} \ No newline at end of file diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java index 91d773dbeb61..15fb9c37ad16 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java @@ -99,7 +99,7 @@ public void init_nullOutputField_shouldThrowExceptionWithDetailedMessage() { } @Test - public void init_nullPrompt_shouldThrowExceptionWithDetailedMessage() { + public void init_neitherPromptNorPromptFile_shouldThrowExceptionWithDetailedMessage() { NamedList args = new NamedList<>(); args.add("inputField", "string_field"); args.add("outputField", "enriched_field"); @@ -108,7 +108,54 @@ public void init_nullPrompt_shouldThrowExceptionWithDetailedMessage() { DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); - assertEquals("Missing required parameter: prompt", e.getMessage()); + assertEquals("Either 'prompt' or 'promptFile' must be provided", e.getMessage()); + } + + @Test + public void init_bothPromptAndPromptFile_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Summarize: {input}"); + args.add("promptFile", "prompt.txt"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("Only one of 'prompt' or 'promptFile' can be provided, not both", e.getMessage()); + } + + @Test + public void init_promptFile_shouldLoadPromptFromFile() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("promptFile", "prompt.txt"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + factory.init(args); + factory.inform(collection1); + + assertEquals("prompt.txt", factory.getPromptFile()); + assertNotNull(factory.getPrompt()); + assertTrue(factory.getPrompt().contains("{input}")); + } + + @Test + public void init_promptFileWithMissingPlaceholder_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("promptFile", "prompt-no-placeholder.txt"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + factory.init(args); + + SolrException e = assertThrows(SolrException.class, () -> factory.inform(collection1)); + assertEquals("prompt must contain {input} placeholder", e.getMessage()); } @Test From cc943170231017069f4d850ae9a09f64a5e9daad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Fri, 27 Mar 2026 15:58:57 +0100 Subject: [PATCH 04/17] [llm-document-enrichment] Add multiple inputField support + tests --- .../DocumentEnrichmentUpdateProcessor.java | 63 ++++--- ...umentEnrichmentUpdateProcessorFactory.java | 111 ++++++++---- .../collection1/conf/prompt-multi-field.txt | 1 + .../solr/collection1/conf/prompt.txt | 2 +- .../conf/schema-language-models.xml | 1 + ...richment-update-request-processor-only.xml | 2 +- .../conf/solrconfig-document-enrichment.xml | 30 +++- ...tEnrichmentUpdateProcessorFactoryTest.java | 161 ++++++++++++++---- ...DocumentEnrichmentUpdateProcessorTest.java | 113 ++++++++++++ 9 files changed, 389 insertions(+), 95 deletions(-) create mode 100644 solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-multi-field.txt diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java index a50160924e96..5abd5629d8bb 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.lang.invoke.MethodHandles; +import java.util.List; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputField; import org.apache.solr.languagemodels.documentenrichment.model.SolrChatModel; @@ -34,13 +35,13 @@ class DocumentEnrichmentUpdateProcessor extends UpdateRequestProcessor { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private IndexSchema schema; - private final String inputField; + private final List inputFields; private final String outputField; private final String prompt; private SolrChatModel chatModel; public DocumentEnrichmentUpdateProcessor( - String inputField, + List inputFields, String outputField, String prompt, SolrChatModel chatModel, @@ -48,10 +49,9 @@ public DocumentEnrichmentUpdateProcessor( UpdateRequestProcessor next) { super(next); this.schema = req.getSchema(); - // prompt must contain "{input}" where the user wants to inject the input data to populate outputField - this.prompt = prompt; - this.inputField = inputField; + this.inputFields = inputFields; this.outputField = outputField; + this.prompt = prompt; this.chatModel = chatModel; } @@ -62,28 +62,35 @@ public DocumentEnrichmentUpdateProcessor( @Override public void processAdd(AddUpdateCommand cmd) throws IOException { SolrInputDocument doc = cmd.getSolrInputDocument(); - SolrInputField inputFieldContent = doc.get(inputField); - if (!isNullOrEmpty(inputFieldContent)) { - try { - // as for now, only a plain text as prompt is sent to the model (no support for tools/skills/agents) - String toInject = inputFieldContent.getValue().toString(); - String injectedPrompt = prompt.replace("{input}", toInject); - String response = chatModel.chat(injectedPrompt); - /* TODO: check if the outputField is multivalued and adapt the code/llm call to deal with lists also, together - with structured output support - */ - doc.setField(outputField, response); - } catch (RuntimeException chatModelFailure) { - if (log.isErrorEnabled()) { - SchemaField uniqueKeyField = schema.getUniqueKeyField(); - String uniqueKeyFieldName = uniqueKeyField.getName(); - log.error( - "Could not process: {} for the document with {}: {}", - inputField, - uniqueKeyFieldName, - doc.getFieldValue(uniqueKeyFieldName), - chatModelFailure); - } + + // Collect all field values; skip enrichment if any declared field is null or empty + String injectedPrompt = prompt; + for (String fieldName : inputFields) { + SolrInputField field = doc.get(fieldName); + if (isNullOrEmpty(field)) { + super.processAdd(cmd); + return; + } + injectedPrompt = injectedPrompt.replace("{" + fieldName + "}", field.getValue().toString()); + } + + try { + // as for now, only a plain text as prompt is sent to the model (no support for tools/skills/agents) + String response = chatModel.chat(injectedPrompt); + /* TODO: check if the outputField is multivalued and adapt the code/llm call to deal with lists also, together + with structured output support + */ + doc.setField(outputField, response); + } catch (RuntimeException chatModelFailure) { + if (log.isErrorEnabled()) { + SchemaField uniqueKeyField = schema.getUniqueKeyField(); + String uniqueKeyFieldName = uniqueKeyField.getName(); + log.error( + "Could not process fields {} for the document with {}: {}", + inputFields, + uniqueKeyFieldName, + doc.getFieldValue(uniqueKeyFieldName), + chatModelFailure); } } super.processAdd(cmd); @@ -94,4 +101,4 @@ protected boolean isNullOrEmpty(SolrInputField inputFieldContent) { || inputFieldContent.getValue() == null || inputFieldContent.getValue().toString().isEmpty()); } -} +} \ No newline at end of file diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java index 8e26971da485..50e710838b63 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java @@ -20,6 +20,13 @@ import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; +import java.util.Collection; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.RequiredSolrParams; import org.apache.solr.common.params.SolrParams; @@ -32,25 +39,28 @@ import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.rest.ManagedResource; import org.apache.solr.rest.ManagedResourceObserver; -import org.apache.solr.schema.StrField; -import org.apache.solr.schema.TextField; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; +import org.apache.solr.schema.StrField; +import org.apache.solr.schema.TextField; import org.apache.solr.update.processor.UpdateRequestProcessor; import org.apache.solr.update.processor.UpdateRequestProcessorFactory; import org.apache.solr.util.plugin.SolrCoreAware; /** - * Insert in an existing field the output of the model coming from a textual field value. + * Insert in an existing field the output of the model coming from one or more textual field values. * - *

The parameters supported are: + *

One or more {@code inputField} parameters specify the Solr fields to use as input. Each field + * name must appear as a {@code {fieldName}} placeholder in the prompt. Exactly one of {@code + * prompt} or {@code promptFile} must be provided. * *

  * <processor class="solr.llm.documentenrichment.update.processor.DocumentEnrichmentUpdateProcessorFactory">
- *   <str name="inputField">textualField</str>
- *   <str name="outputField">anotherTextualField</str>
- *   <str name="prompt">Summarize: {input}</str>
+ *   <str name="inputField">title_field</str>
+ *   <str name="inputField">body_field</str>
+ *   <str name="outputField">enriched_field</str>
+ *   <str name="prompt">Title: {title_field}. Body: {body_field}.</str>
  *   <str name="model">ChatModel</str>
  * </processor>
  * 
@@ -59,15 +69,22 @@ * *
  * <processor class="solr.llm.documentenrichment.update.processor.DocumentEnrichmentUpdateProcessorFactory">
- *   <str name="inputField">textualField</str>
- *   <str name="outputField">anotherTextualField</str>
+ *   <str name="inputField">title_field</str>
+ *   <str name="outputField">enriched_field</str>
  *   <str name="promptFile">prompt.txt</str>
  *   <str name="model">ChatModel</str>
  * </processor>
  * 
* - *

Exactly one of {@code prompt} or {@code promptFile} must be provided. The prompt (from either - * source) must contain the {@code {input}} placeholder. + *

Validation rules: + * + *

    + *
  • At least one {@code inputField} must be declared. + *
  • Exactly one of {@code prompt} or {@code promptFile} must be provided. + *
  • Every declared {@code inputField} must have a corresponding {@code {fieldName}} placeholder + * in the prompt. + *
  • Every {@code {placeholder}} in the prompt must correspond to a declared {@code inputField}. + *
*/ public class DocumentEnrichmentUpdateProcessorFactory extends UpdateRequestProcessorFactory implements SolrCoreAware, ManagedResourceObserver { @@ -76,9 +93,11 @@ public class DocumentEnrichmentUpdateProcessorFactory extends UpdateRequestProce private static final String PROMPT = "prompt"; private static final String PROMPT_FILE = "promptFile"; private static final String MODEL_NAME = "model"; + private static final Pattern PLACEHOLDER_PATTERN = Pattern.compile("\\{([^}]+)\\}"); + private ManagedChatModelStore modelStore = null; - private String inputField; // TODO: change with a list of input fields (check how it's done in other UpdateProcessor that supports this behaviour) + private List inputFields; private String outputField; private String prompt; private String promptFile; @@ -87,9 +106,18 @@ public class DocumentEnrichmentUpdateProcessorFactory extends UpdateRequestProce @Override public void init(final NamedList args) { + // removeConfigArgs handles both multiple and + // and must be called before toSolrParams() since it mutates args in place + Collection fieldNames = args.removeConfigArgs(INPUT_FIELD_PARAM); + if (fieldNames.isEmpty()) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "At least one 'inputField' must be provided"); + } + inputFields = List.copyOf(fieldNames); + params = args.toSolrParams(); RequiredSolrParams required = params.required(); - inputField = required.get(INPUT_FIELD_PARAM); outputField = required.get(OUTPUT_FIELD_PARAM); modelName = required.get(MODEL_NAME); @@ -107,11 +135,7 @@ public void init(final NamedList args) { "Only one of 'prompt' or 'promptFile' can be provided, not both"); } if (inlinePrompt != null) { - if (!inlinePrompt.contains("{input}")) { - throw new SolrException( - SolrException.ErrorCode.SERVER_ERROR, - "prompt must contain {input} placeholder"); - } + validatePromptPlaceholders(inlinePrompt, inputFields); this.prompt = inlinePrompt; } this.promptFile = promptFilePath; @@ -130,11 +154,7 @@ public void inform(SolrCore core) { "Cannot read prompt file: " + promptFile, e); } - if (!prompt.contains("{input}")) { - throw new SolrException( - SolrException.ErrorCode.SERVER_ERROR, - "prompt must contain {input} placeholder"); - } + validatePromptPlaceholders(prompt, inputFields); } } @@ -154,16 +174,17 @@ public UpdateRequestProcessor getInstance( SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) { IndexSchema latestSchema = req.getCore().getLatestSchema(); - if (!latestSchema.isDynamicField(inputField) && !latestSchema.hasExplicitField(inputField)) { - throw new SolrException( - SolrException.ErrorCode.SERVER_ERROR, "undefined field: \"" + inputField + "\""); + for (String fieldName : inputFields) { + if (!latestSchema.isDynamicField(fieldName) && !latestSchema.hasExplicitField(fieldName)) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, "undefined field: \"" + fieldName + "\""); + } } final SchemaField outputFieldSchema = latestSchema.getField(outputField); assertIsTextualField(outputFieldSchema); - ManagedChatModelStore modelStore = - ManagedChatModelStore.getManagedModelStore(req.getCore()); + ManagedChatModelStore modelStore = ManagedChatModelStore.getManagedModelStore(req.getCore()); SolrChatModel chatModel = modelStore.getModel(modelName); if (chatModel == null) { throw new SolrException( @@ -174,8 +195,10 @@ public UpdateRequestProcessor getInstance( + ManagedChatModelStore.REST_END_POINT); } - return new DocumentEnrichmentUpdateProcessor(inputField, outputField, prompt, chatModel, req, next); + return new DocumentEnrichmentUpdateProcessor( + inputFields, outputField, prompt, chatModel, req, next); } + // This is used on the outputField. Now the support is limited. Can be changed with structured outputs. protected void assertIsTextualField(SchemaField schemaField) { FieldType fieldType = schemaField.getType(); @@ -187,8 +210,32 @@ protected void assertIsTextualField(SchemaField schemaField) { } } - public String getInputField() { - return inputField; + private static void validatePromptPlaceholders(String prompt, List fieldNames) { + Set promptPlaceholders = new LinkedHashSet<>(); + Matcher m = PLACEHOLDER_PATTERN.matcher(prompt); + while (m.find()) { + promptPlaceholders.add(m.group(1)); + } + + Set missingInPrompt = new LinkedHashSet<>(fieldNames); + missingInPrompt.removeAll(promptPlaceholders); + if (!missingInPrompt.isEmpty()) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "prompt is missing placeholders for inputField(s): " + missingInPrompt); + } + + Set unknownInPrompt = new LinkedHashSet<>(promptPlaceholders); + unknownInPrompt.removeAll(new HashSet<>(fieldNames)); + if (!unknownInPrompt.isEmpty()) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "prompt contains placeholders not declared as inputField(s): " + unknownInPrompt); + } + } + + public List getInputFields() { + return inputFields; } public String getOutputField() { @@ -206,4 +253,4 @@ public String getModelName() { public String getPromptFile() { return promptFile; } -} +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-multi-field.txt b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-multi-field.txt new file mode 100644 index 000000000000..65c2f125e36c --- /dev/null +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-multi-field.txt @@ -0,0 +1 @@ +Title: {string_field}. Body: {body_field}. \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt.txt b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt.txt index a9e89d5bd9dc..502449a5cf5d 100644 --- a/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt.txt +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt.txt @@ -1 +1 @@ -Summarize this content: {input} \ No newline at end of file +Summarize this content: {string_field} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml index 5334762cc388..f7ad738784f6 100644 --- a/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml @@ -36,6 +36,7 @@ + diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml index 522fbfe09267..7aa85a8b362a 100644 --- a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml @@ -53,7 +53,7 @@ string_field enriched_field - Summarize this content: {input} + Summarize this content: {string_field} dummy-chat-1 diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml index 02015f6296ab..25f07fea4272 100644 --- a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml @@ -53,7 +53,7 @@ string_field enriched_field - Summarize this content: {input} + Summarize this content: {string_field} dummy-chat-1 @@ -63,7 +63,7 @@ string_field enriched_field - Summarize this content: {input} + Summarize this content: {string_field} exception-throwing-chat-model @@ -74,10 +74,32 @@ string_field enriched_field - Summarize this content: {input} + Summarize this content: {string_field} dummy-chat-1 - + + + string_field + body_field + enriched_field + Title: {string_field}. Body: {body_field}. + dummy-chat-1 + + + + + + + string_field + body_field + enriched_field + Title: {string_field}. Body: {body_field}. + exception-throwing-chat-model + + + + + \ No newline at end of file diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java index 15fb9c37ad16..2f2a91686859 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java @@ -16,6 +16,7 @@ */ package org.apache.solr.languagemodels.documentenrichment.update.processor; +import java.util.List; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.NamedList; @@ -60,36 +61,51 @@ public void init_fullArgs_shouldInitAllParams() { NamedList args = new NamedList<>(); args.add("inputField", "string_field"); args.add("outputField", "enriched_field"); - args.add("prompt", "Summarize: {input}"); + args.add("prompt", "Summarize: {string_field}"); args.add("model", "model1"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); factory.init(args); - assertEquals("string_field", factory.getInputField()); + assertEquals(List.of("string_field"), factory.getInputFields()); assertEquals("enriched_field", factory.getOutputField()); - assertEquals("Summarize: {input}", factory.getPrompt()); + assertEquals("Summarize: {string_field}", factory.getPrompt()); assertEquals("model1", factory.getModelName()); } @Test - public void init_nullInputField_shouldThrowExceptionWithDetailedMessage() { + public void init_multipleInputFields_shouldInitAllFields() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("inputField", "body_field"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Title: {string_field}. Body: {body_field}."); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + factory.init(args); + + assertEquals(List.of("string_field", "body_field"), factory.getInputFields()); + } + + @Test + public void init_noInputField_shouldThrowExceptionWithDetailedMessage() { NamedList args = new NamedList<>(); args.add("outputField", "enriched_field"); - args.add("prompt", "Summarize: {input}"); + args.add("prompt", "Summarize: {string_field}"); args.add("model", "model1"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); - assertEquals("Missing required parameter: inputField", e.getMessage()); + assertEquals("At least one 'inputField' must be provided", e.getMessage()); } @Test public void init_nullOutputField_shouldThrowExceptionWithDetailedMessage() { NamedList args = new NamedList<>(); args.add("inputField", "string_field"); - args.add("prompt", "Summarize: {input}"); + args.add("prompt", "Summarize: {string_field}"); args.add("model", "model1"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); @@ -116,7 +132,7 @@ public void init_bothPromptAndPromptFile_shouldThrowExceptionWithDetailedMessage NamedList args = new NamedList<>(); args.add("inputField", "string_field"); args.add("outputField", "enriched_field"); - args.add("prompt", "Summarize: {input}"); + args.add("prompt", "Summarize: {string_field}"); args.add("promptFile", "prompt.txt"); args.add("model", "model1"); @@ -127,49 +143,48 @@ public void init_bothPromptAndPromptFile_shouldThrowExceptionWithDetailedMessage } @Test - public void init_promptFile_shouldLoadPromptFromFile() { + public void init_promptMissingPlaceholderForDeclaredField_shouldThrowExceptionWithDetailedMessage() { NamedList args = new NamedList<>(); args.add("inputField", "string_field"); args.add("outputField", "enriched_field"); - args.add("promptFile", "prompt.txt"); + args.add("prompt", "Summarize:"); args.add("model", "model1"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); - factory.init(args); - factory.inform(collection1); - assertEquals("prompt.txt", factory.getPromptFile()); - assertNotNull(factory.getPrompt()); - assertTrue(factory.getPrompt().contains("{input}")); + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("prompt is missing placeholders for inputField(s): [string_field]", e.getMessage()); } @Test - public void init_promptFileWithMissingPlaceholder_shouldThrowExceptionWithDetailedMessage() { + public void init_promptMissingOnePlaceholderOfMultipleFields_shouldThrowExceptionWithDetailedMessage() { NamedList args = new NamedList<>(); args.add("inputField", "string_field"); + args.add("inputField", "body_field"); args.add("outputField", "enriched_field"); - args.add("promptFile", "prompt-no-placeholder.txt"); + args.add("prompt", "Title: {string_field}."); args.add("model", "model1"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); - factory.init(args); - SolrException e = assertThrows(SolrException.class, () -> factory.inform(collection1)); - assertEquals("prompt must contain {input} placeholder", e.getMessage()); + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("prompt is missing placeholders for inputField(s): [body_field]", e.getMessage()); } @Test - public void init_missingPlaceholderPrompt_shouldThrowExceptionWithDetailedMessage() { + public void init_promptHasExtraPlaceholderNotDeclaredAsInputField_shouldThrowExceptionWithDetailedMessage() { NamedList args = new NamedList<>(); args.add("inputField", "string_field"); args.add("outputField", "enriched_field"); - args.add("prompt", "Summarize:"); + args.add("prompt", "Title: {string_field}. Extra: {unknown_field}."); args.add("model", "model1"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); - assertEquals("prompt must contain {input} placeholder", e.getMessage()); + assertEquals( + "prompt contains placeholders not declared as inputField(s): [unknown_field]", + e.getMessage()); } @Test @@ -177,7 +192,7 @@ public void init_nullModel_shouldThrowExceptionWithDetailedMessage() { NamedList args = new NamedList<>(); args.add("inputField", "string_field"); args.add("outputField", "enriched_field"); - args.add("prompt", "Summarize: {input}"); + args.add("prompt", "Summarize: {string_field}"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); @@ -185,6 +200,57 @@ public void init_nullModel_shouldThrowExceptionWithDetailedMessage() { assertEquals("Missing required parameter: model", e.getMessage()); } + @Test + public void init_promptFile_shouldLoadPromptFromFile() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("promptFile", "prompt.txt"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + factory.init(args); + factory.inform(collection1); + + assertEquals("prompt.txt", factory.getPromptFile()); + assertNotNull(factory.getPrompt()); + assertTrue(factory.getPrompt().contains("{string_field}")); + } + + @Test + public void init_promptFileMultiField_shouldLoadAndValidateBothPlaceholders() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("inputField", "body_field"); + args.add("outputField", "enriched_field"); + args.add("promptFile", "prompt-multi-field.txt"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + factory.init(args); + factory.inform(collection1); + + assertNotNull(factory.getPrompt()); + assertTrue(factory.getPrompt().contains("{string_field}")); + assertTrue(factory.getPrompt().contains("{body_field}")); + } + + @Test + public void init_promptFileWithMissingPlaceholder_shouldThrowExceptionInInform() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("promptFile", "prompt-no-placeholder.txt"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + factory.init(args); + + SolrException e = assertThrows(SolrException.class, () -> factory.inform(collection1)); + assertEquals( + "prompt is missing placeholders for inputField(s): [string_field]", e.getMessage()); + } + /* Following tests depend on a real solr schema and depend on BeforeClass-AfterClass methods */ @Test @@ -192,7 +258,7 @@ public void init_notExistentOutputField_shouldThrowExceptionWithDetailedMessage( NamedList args = new NamedList<>(); args.add("inputField", "string_field"); args.add("outputField", "notExistentOutput"); - args.add("prompt", "Summarize: {input}"); + args.add("prompt", "Summarize: {string_field}"); args.add("model", "model1"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); @@ -210,7 +276,7 @@ public void init_notTextualOutputField_shouldThrowExceptionWithDetailedMessage() NamedList args = new NamedList<>(); args.add("inputField", "string_field"); args.add("outputField", "vector"); - args.add("prompt", "Summarize: {input}"); + args.add("prompt", "Summarize: {string_field}"); args.add("model", "model1"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); @@ -228,7 +294,25 @@ public void init_notExistentInputField_shouldThrowExceptionWithDetailedMessage() NamedList args = new NamedList<>(); args.add("inputField", "notExistentInput"); args.add("outputField", "enriched_field"); - args.add("prompt", "Summarize: {input}"); + args.add("prompt", "Summarize: {notExistentInput}"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + ModifiableSolrParams params = new ModifiableSolrParams(); + SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; + factory.init(args); + + SolrException e = assertThrows(SolrException.class, () -> factory.getInstance(req, null, null)); + assertEquals("undefined field: \"notExistentInput\"", e.getMessage()); + } + + @Test + public void init_multipleInputFields_oneNotExistent_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("inputField", "notExistentInput"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Title: {string_field}. Body: {notExistentInput}."); args.add("model", "model1"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); @@ -243,10 +327,29 @@ public void init_notExistentInputField_shouldThrowExceptionWithDetailedMessage() @Test public void init_dynamicInputField_shouldNotThrowException() { UpdateRequestProcessor instance = - createUpdateProcessor("text_s", "enriched_field", collection1, "model1"); + createUpdateProcessor("text_s", "enriched_field", collection1, "model2"); assertNotNull(instance); } + @Test + public void init_multipleDynamicInputFields_shouldNotThrowException() { + NamedList args = new NamedList<>(); + ManagedChatModelStore.getManagedModelStore(collection1) + .addModel(new SolrChatModel("model1", null, null)); + args.add("inputField", "text_s"); + args.add("inputField", "body_field"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Title: {text_s}. Body: {body_field}."); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + ModifiableSolrParams params = new ModifiableSolrParams(); + factory.init(args); + + SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; + assertNotNull(factory.getInstance(req, null, null)); + } + private UpdateRequestProcessor createUpdateProcessor( String inputFieldName, String outputFieldName, SolrCore core, String modelName) { NamedList args = new NamedList<>(); @@ -255,7 +358,7 @@ private UpdateRequestProcessor createUpdateProcessor( .addModel(new SolrChatModel(modelName, null, null)); args.add("inputField", inputFieldName); args.add("outputField", outputFieldName); - args.add("prompt", "Summarize: {input}"); + args.add("prompt", "Summarize: {" + inputFieldName + "}"); args.add("model", modelName); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java index 76d691cdebad..d715fb6e8e93 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java @@ -201,6 +201,119 @@ public void processAtomicUpdate_shouldReplaceExistingEnrichedFieldNotAppend() th "/response/docs/[1]/enriched_field=='enriched content'"); } + // --- multi-field tests --- + + @Test + public void processAdd_multipleInputFields_allPresent_shouldEnrichDocument() throws Exception { + loadChatModel("dummy-chat-model.json"); + + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "body_field", "He is very proud."), + "documentEnrichmentMultiField"); + addWithChain( + sdoc("id", "98", "string_field", "Kakaroth is a saiyan.", "body_field", "He grew up on Earth."), + "documentEnrichmentMultiField"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field=='enriched content'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/enriched_field=='enriched content'"); + } + + @Test + public void processAdd_multipleInputFields_firstFieldNull_shouldSkipEnrichment() throws Exception { + loadChatModel("dummy-chat-model.json"); + + addWithChain( + sdoc("id", "99", "body_field", "He is very proud."), // string_field absent + "documentEnrichmentMultiField"); + addWithChain( + sdoc("id", "98", "body_field", "He is very jealous."), // string_field absent + "documentEnrichmentMultiField"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); + } + + @Test + public void processAdd_multipleInputFields_secondFieldEmpty_shouldSkipEnrichment() throws Exception { + loadChatModel("dummy-chat-model.json"); + + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "body_field", ""), + "documentEnrichmentMultiField"); + addWithChain( + sdoc("id", "98", "string_field", "Goku is the best saiyan.", "body_field", ""), + "documentEnrichmentMultiField"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); + } + + @Test + public void processAdd_multipleInputFields_bothFieldsAbsent_shouldSkipEnrichment() throws Exception { + loadChatModel("dummy-chat-model.json"); + + addWithChain(sdoc("id", "99"), "documentEnrichmentMultiField"); + addWithChain(sdoc("id", "98"), "documentEnrichmentMultiField"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); + } + + @Test + public void processAdd_multipleInputFields_failingModel_shouldLogAndSkipEnrichment() throws Exception { + loadChatModel("exception-throwing-chat-model.json"); + + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "body_field", "He is very proud."), + "failingDocumentEnrichmentMultiField"); + addWithChain( + sdoc("id", "98", "string_field", "Kakaroth is a saiyan.", "body_field", "He grew up on Earth."), + "failingDocumentEnrichmentMultiField"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); + } + private SolrQuery getEnrichmentQuery() { final SolrQuery query = new SolrQuery(); query.setQuery("*:*"); From c723362b1c83f2f50b43b71761970dc60e9f746b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Mon, 30 Mar 2026 16:52:10 +0200 Subject: [PATCH 05/17] [llm-document-enrchment] Add supprot for: - multivalued outputField - outputField different from Str/Text, with numeric, boolean and date --- .../model/SolrChatModel.java | 35 +- .../DocumentEnrichmentUpdateProcessor.java | 25 +- ...umentEnrichmentUpdateProcessorFactory.java | 95 ++++- .../dummy-chat-model-multivalued-boolean.json | 7 + .../dummy-chat-model-multivalued-date.json | 7 + .../dummy-chat-model-multivalued-double.json | 7 + .../dummy-chat-model-multivalued-float.json | 7 + .../dummy-chat-model-multivalued-int.json | 7 + .../dummy-chat-model-multivalued-long.json | 7 + .../dummy-chat-model-multivalued-string.json | 7 + .../dummy-chat-model-single-boolean.json | 7 + .../dummy-chat-model-single-date.json | 7 + .../dummy-chat-model-single-double.json | 7 + .../dummy-chat-model-single-float.json | 7 + .../dummy-chat-model-single-int.json | 7 + .../dummy-chat-model-single-long.json | 7 + .../modelChatExamples/dummy-chat-model.json | 2 +- .../conf/schema-language-models.xml | 23 +- .../conf/solrconfig-document-enrichment.xml | 130 ++++++ ...tEnrichmentUpdateProcessorFactoryTest.java | 48 ++- ...DocumentEnrichmentUpdateProcessorTest.java | 370 ++++++++++++++++-- 21 files changed, 762 insertions(+), 57 deletions(-) create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-boolean.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-date.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-double.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-float.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-int.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-long.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-string.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-boolean.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-date.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-double.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-float.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-int.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-long.json diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java index 9d06001e5903..1cc8edb0e742 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java @@ -18,18 +18,18 @@ import dev.langchain4j.data.message.UserMessage; import dev.langchain4j.model.chat.ChatModel; - +import dev.langchain4j.model.chat.request.ChatRequest; +import dev.langchain4j.model.chat.request.ResponseFormat; import java.lang.invoke.MethodHandles; import java.lang.reflect.Method; import java.time.Duration; import java.util.ArrayList; import java.util.Map; import java.util.Objects; -import dev.langchain4j.model.chat.request.ChatRequest; -import dev.langchain4j.model.chat.response.ChatResponse; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.RamUsageEstimator; import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.Utils; import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.languagemodels.documentenrichment.store.ChatModelException; import org.apache.solr.languagemodels.documentenrichment.store.rest.ManagedChatModelStore; @@ -145,13 +145,28 @@ public SolrChatModel( this.hashCode = calculateHashCode(); } - public String chat(String text){ - ChatRequest chatRequest = ChatRequest.builder() - //.responseFormat(responseFormat) // used for structured outputs - .messages(UserMessage.from(text)) - .build(); - ChatResponse chatResponse = chatModel.chat(chatRequest); - return chatResponse.aiMessage().text(); // To change in case of structured output support + /** + * Sends a structured chat request and returns the parsed value from the {@code {"value": ...}} + * JSON object that the model is instructed to produce via {@code responseFormat}. + * + * @return the extracted value: a {@link String}, {@link Number}, {@link Boolean}, or {@link + * java.util.List} depending on the Solr output field type + */ + @SuppressWarnings("unchecked") + public Object chat(String text, ResponseFormat responseFormat) { + ChatRequest chatRequest = + ChatRequest.builder() + .responseFormat(responseFormat) + .messages(UserMessage.from(text)) + .build(); + String rawJson = chatModel.chat(chatRequest).aiMessage().text(); + Object parsed = Utils.fromJSONString(rawJson); + if (!(parsed instanceof Map map) || !map.containsKey("value")) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "LLM response is missing the 'value' key: " + rawJson); + } + return ((Map) map).get("value"); } @Override diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java index 5abd5629d8bb..57ca29e1a7dd 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java @@ -17,6 +17,7 @@ package org.apache.solr.languagemodels.documentenrichment.update.processor; +import dev.langchain4j.model.chat.request.ResponseFormat; import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.List; @@ -38,13 +39,17 @@ class DocumentEnrichmentUpdateProcessor extends UpdateRequestProcessor { private final List inputFields; private final String outputField; private final String prompt; - private SolrChatModel chatModel; + private final SolrChatModel chatModel; + private final boolean multiValued; + private final ResponseFormat responseFormat; public DocumentEnrichmentUpdateProcessor( List inputFields, String outputField, String prompt, SolrChatModel chatModel, + boolean multiValued, + ResponseFormat responseFormat, SolrQueryRequest req, UpdateRequestProcessor next) { super(next); @@ -53,6 +58,8 @@ public DocumentEnrichmentUpdateProcessor( this.outputField = outputField; this.prompt = prompt; this.chatModel = chatModel; + this.multiValued = multiValued; + this.responseFormat = responseFormat; } /** @@ -76,11 +83,15 @@ public void processAdd(AddUpdateCommand cmd) throws IOException { try { // as for now, only a plain text as prompt is sent to the model (no support for tools/skills/agents) - String response = chatModel.chat(injectedPrompt); - /* TODO: check if the outputField is multivalued and adapt the code/llm call to deal with lists also, together - with structured output support - */ - doc.setField(outputField, response); + // chatModel.chat returns the parsed value from the structured JSON response + Object value = chatModel.chat(injectedPrompt, responseFormat); + if (multiValued && value instanceof List list) { + for (Object item : list) { + doc.addField(outputField, item); + } + } else { + doc.setField(outputField, value); + } } catch (RuntimeException chatModelFailure) { if (log.isErrorEnabled()) { SchemaField uniqueKeyField = schema.getUniqueKeyField(); @@ -101,4 +112,4 @@ protected boolean isNullOrEmpty(SolrInputField inputFieldContent) { || inputFieldContent.getValue() == null || inputFieldContent.getValue().toString().isEmpty()); } -} \ No newline at end of file +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java index 50e710838b63..508b46fb7be7 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java @@ -17,6 +17,16 @@ package org.apache.solr.languagemodels.documentenrichment.update.processor; +import dev.langchain4j.model.chat.request.ResponseFormat; +import dev.langchain4j.model.chat.request.ResponseFormatType; +import dev.langchain4j.model.chat.request.json.JsonArraySchema; +import dev.langchain4j.model.chat.request.json.JsonBooleanSchema; +import dev.langchain4j.model.chat.request.json.JsonIntegerSchema; +import dev.langchain4j.model.chat.request.json.JsonNumberSchema; +import dev.langchain4j.model.chat.request.json.JsonObjectSchema; +import dev.langchain4j.model.chat.request.json.JsonSchema; +import dev.langchain4j.model.chat.request.json.JsonSchemaElement; +import dev.langchain4j.model.chat.request.json.JsonStringSchema; import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; @@ -39,8 +49,15 @@ import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.rest.ManagedResource; import org.apache.solr.rest.ManagedResourceObserver; +import org.apache.solr.schema.BoolField; +import org.apache.solr.schema.DatePointField; +import org.apache.solr.schema.DenseVectorField; +import org.apache.solr.schema.DoublePointField; import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.FloatPointField; import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.IntPointField; +import org.apache.solr.schema.LongPointField; import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.StrField; import org.apache.solr.schema.TextField; @@ -182,7 +199,10 @@ public UpdateRequestProcessor getInstance( } final SchemaField outputFieldSchema = latestSchema.getField(outputField); - assertIsTextualField(outputFieldSchema); + assertIsSupportedField(outputFieldSchema); + + ResponseFormat responseFormat = buildResponseFormat(outputFieldSchema); + boolean multiValued = outputFieldSchema.multiValued(); ManagedChatModelStore modelStore = ManagedChatModelStore.getManagedModelStore(req.getCore()); SolrChatModel chatModel = modelStore.getModel(modelName); @@ -196,17 +216,74 @@ public UpdateRequestProcessor getInstance( } return new DocumentEnrichmentUpdateProcessor( - inputFields, outputField, prompt, chatModel, req, next); + inputFields, outputField, prompt, chatModel, multiValued, responseFormat, req, next); + } + + /** + * Validates that the output field type is supported. Supported types are: textual (Str, Text), + * numeric (Int, Long, Float, Double), boolean and date. Vector and binary fields are not + * supported. + */ + protected void assertIsSupportedField(SchemaField schemaField) { + try { + toJsonSchemaElement(schemaField.getType()); + } catch (SolrException e) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "field type is not supported by Document Enrichment: " + schemaField.getName()); + } } - // This is used on the outputField. Now the support is limited. Can be changed with structured outputs. - protected void assertIsTextualField(SchemaField schemaField) { - FieldType fieldType = schemaField.getType(); - if (!(fieldType instanceof StrField) && !(fieldType instanceof TextField)) { + /** + * Builds a {@link ResponseFormat} that instructs the model to return a JSON object {@code + * {"value": ...}} whose value type matches the Solr field type. For multivalued fields the value + * is wrapped in a JSON array. + */ + static ResponseFormat buildResponseFormat(SchemaField schemaField) { + JsonSchemaElement valueElement = toJsonSchemaElement(schemaField.getType()); + JsonSchemaElement valueSchema = + schemaField.multiValued() + ? JsonArraySchema.builder().items(valueElement).build() // could be only supported by Gemini + // (source: https://github.com/langchain4j/langchain4j/blob/main/docs/docs/tutorials/structured-outputs.md) + // If not supported, we cannot support multivalued fields as outputField + : valueElement; + return ResponseFormat.builder() + .type(ResponseFormatType.JSON) + .jsonSchema( + JsonSchema.builder() + .name("output") + .rootElement( + JsonObjectSchema.builder() + .addProperty("value", valueSchema) + .required("value") + .build()) + .build()) + .build(); + } + + private static JsonSchemaElement toJsonSchemaElement(FieldType fieldType) { + // DenseVectorField extends FloatPointField, so it must be rejected before the numeric checks + if (fieldType instanceof DenseVectorField) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "field type is not supported by Document Enrichment: " + + fieldType.getClass().getSimpleName()); + } + if (fieldType instanceof StrField + || fieldType instanceof TextField + || fieldType instanceof DatePointField) { + return new JsonStringSchema(); + } else if (fieldType instanceof IntPointField || fieldType instanceof LongPointField) { + return new JsonIntegerSchema(); + } else if (fieldType instanceof FloatPointField || fieldType instanceof DoublePointField) { + return new JsonNumberSchema(); + } else if (fieldType instanceof BoolField) { + return new JsonBooleanSchema(); + } else { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, - "only textual fields are compatible with Document Enrichment: " - + schemaField.getName()); + "field type is not supported by Document Enrichment: " + + fieldType.getClass().getSimpleName()); } } @@ -253,4 +330,4 @@ public String getModelName() { public String getPromptFile() { return promptFile; } -} \ No newline at end of file +} diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-boolean.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-boolean.json new file mode 100644 index 000000000000..7ba22888cb2b --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-boolean.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-boolean-multi", + "params": { + "response": "{\"value\": [true, false]}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-date.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-date.json new file mode 100644 index 000000000000..f159e3334614 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-date.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-date-multi", + "params": { + "response": "{\"value\": [\"2024-01-15T00:00:00Z\", \"2025-06-30T00:00:00Z\"]}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-double.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-double.json new file mode 100644 index 000000000000..8b01495e474e --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-double.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-double-multi", + "params": { + "response": "{\"value\": [3.14, 2.71]}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-float.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-float.json new file mode 100644 index 000000000000..0415048c1315 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-float.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-float-multi", + "params": { + "response": "{\"value\": [1.5, 2.5]}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-int.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-int.json new file mode 100644 index 000000000000..ff15d3f0b584 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-int.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-int-multi", + "params": { + "response": "{\"value\": [1, 2]}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-long.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-long.json new file mode 100644 index 000000000000..03c06eb0f5d3 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-long.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-long-multi", + "params": { + "response": "{\"value\": [10, 20, 30]}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-string.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-string.json new file mode 100644 index 000000000000..b482ef654211 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-string.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-chat-multivalued-1", + "params": { + "response": "{\"value\": [\"tag1\", \"tag2\"]}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-boolean.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-boolean.json new file mode 100644 index 000000000000..caca167287a6 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-boolean.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-boolean", + "params": { + "response": "{\"value\": true}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-date.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-date.json new file mode 100644 index 000000000000..b98eb53cf506 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-date.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-date", + "params": { + "response": "{\"value\": \"2024-01-15T00:00:00Z\"}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-double.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-double.json new file mode 100644 index 000000000000..5301937628f7 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-double.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-double", + "params": { + "response": "{\"value\": 2.5}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-float.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-float.json new file mode 100644 index 000000000000..8f0c63512a35 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-float.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-float", + "params": { + "response": "{\"value\": 1.5}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-int.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-int.json new file mode 100644 index 000000000000..664d846e1260 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-int.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-int", + "params": { + "response": "{\"value\": 7}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-long.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-long.json new file mode 100644 index 000000000000..6d58cab102fa --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-long.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-long", + "params": { + "response": "{\"value\": 42}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model.json index f331535d5e9f..169cbc710450 100644 --- a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model.json +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model.json @@ -2,6 +2,6 @@ "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", "name": "dummy-chat-1", "params": { - "response": "enriched content" + "response": "{\"value\": \"enriched content\"}" } } \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml index f7ad738784f6..a7d329e1a88f 100644 --- a/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml @@ -25,7 +25,11 @@ - + + + + + @@ -38,6 +42,23 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml index 25f07fea4272..f9b82c153d9e 100644 --- a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml @@ -91,6 +91,16 @@ + + + string_field + enriched_field_multi + Extract tags from: {string_field} + dummy-chat-multivalued-1 + + + + string_field @@ -102,4 +112,124 @@ + + + string_field + output_long + Extract a number from: {string_field} + dummy-long + + + + + + + string_field + output_int + Extract a number from: {string_field} + dummy-int + + + + + + + string_field + output_float + Extract a number from: {string_field} + dummy-float + + + + + + + string_field + output_double + Extract a number from: {string_field} + dummy-double + + + + + + + string_field + output_boolean + Is this true or false: {string_field} + dummy-boolean + + + + + + + string_field + output_date + Extract a date from: {string_field} + dummy-date + + + + + + + string_field + output_long_multi + Extract numbers from: {string_field} + dummy-long-multi + + + + + + + string_field + output_int_multi + Extract numbers from: {string_field} + dummy-int-multi + + + + + + + string_field + output_float_multi + Extract numbers from: {string_field} + dummy-float-multi + + + + + + + string_field + output_double_multi + Extract numbers from: {string_field} + dummy-double-multi + + + + + + + string_field + output_boolean_multi + Extract boolean values from: {string_field} + dummy-boolean-multi + + + + + + + string_field + output_date_multi + Extract dates from: {string_field} + dummy-date-multi + + + + \ No newline at end of file diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java index 2f2a91686859..b2ba1cf0a401 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java @@ -286,7 +286,7 @@ public void init_notTextualOutputField_shouldThrowExceptionWithDetailedMessage() SolrException e = assertThrows(SolrException.class, () -> factory.getInstance(req, null, null)); assertEquals( - "only textual fields are compatible with Document Enrichment: vector", e.getMessage()); + "field type is not supported by Document Enrichment: vector", e.getMessage()); } @Test @@ -324,6 +324,52 @@ public void init_multipleInputFields_oneNotExistent_shouldThrowExceptionWithDeta assertEquals("undefined field: \"notExistentInput\"", e.getMessage()); } + @Test + public void init_multivaluedStringOutputField_shouldNotThrowException() { + UpdateRequestProcessor instance = + createUpdateProcessor("string_field", "enriched_field_multi", collection1, "model-mv"); + assertNotNull(instance); + } + + @Test + public void init_multivaluedStringOutputField_buildResponseFormat_shouldProduceArraySchema() { + NamedList args = new NamedList<>(); + ManagedChatModelStore.getManagedModelStore(collection1) + .addModel(new SolrChatModel("model-rf", null, null)); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field_multi"); + args.add("prompt", "Summarize: {string_field}"); + args.add("model", "model-rf"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + factory.init(args); + ModifiableSolrParams params = new ModifiableSolrParams(); + SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; + assertNotNull(factory.getInstance(req, null, null)); + + // verify the ResponseFormat is constructed correctly for the multivalued field + var schema = collection1.getLatestSchema(); + var schemaField = schema.getField("enriched_field_multi"); + assertTrue(schemaField.multiValued()); + var responseFormat = DocumentEnrichmentUpdateProcessorFactory.buildResponseFormat(schemaField); + assertNotNull(responseFormat); + assertEquals( + dev.langchain4j.model.chat.request.ResponseFormatType.JSON, responseFormat.type()); + assertNotNull(responseFormat.jsonSchema()); + } + + @Test + public void init_singleValuedStringOutputField_buildResponseFormat_shouldProduceStringSchema() { + var schema = collection1.getLatestSchema(); + var schemaField = schema.getField("enriched_field"); + assertFalse(schemaField.multiValued()); + var responseFormat = DocumentEnrichmentUpdateProcessorFactory.buildResponseFormat(schemaField); + assertNotNull(responseFormat); + assertEquals( + dev.langchain4j.model.chat.request.ResponseFormatType.JSON, responseFormat.type()); + assertNotNull(responseFormat.jsonSchema()); + } + @Test public void init_dynamicInputField_shouldNotThrowException() { UpdateRequestProcessor instance = diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java index d715fb6e8e93..5349de833df3 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java @@ -42,21 +42,30 @@ public static void cleanup() throws Exception { afterTest(); } + private String loadedModelId; + @After public void afterEachTest() throws Exception { - restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/dummy-chat-1"); - restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/exception-throwing-chat-model"); + if (loadedModelId != null) { + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + loadedModelId); + loadedModelId = null; + } + } + + private void loadTestChatModel(String fileName, String modelId) throws Exception { + loadChatModel(fileName); + loadedModelId = modelId; } @Test public void processAdd_inputField_shouldEnrichInputField() throws Exception { - loadChatModel("dummy-chat-model.json"); + loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -65,8 +74,6 @@ public void processAdd_inputField_shouldEnrichInputField() throws Exception { "/response/docs/[0]/enriched_field=='enriched content'", "/response/docs/[1]/id=='98'", "/response/docs/[1]/enriched_field=='enriched content'"); - - restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/dummy-1"); // clean up } /* @@ -92,12 +99,12 @@ public void processAdd_modelNotFound_shouldThrowException() { @Test public void processAdd_emptyInputField_shouldLogAndIndexWithNoEnrichedField() throws Exception { - loadChatModel("dummy-chat-model.json"); + loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain(sdoc("id", "99", "string_field", ""), "documentEnrichment"); addWithChain(sdoc("id", "98", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -110,12 +117,12 @@ public void processAdd_emptyInputField_shouldLogAndIndexWithNoEnrichedField() th @Test public void processAdd_nullInputField_shouldLogAndIndexWithNoEnrichedField() throws Exception { - loadChatModel("dummy-chat-model.json"); + loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); assertU(adoc("id", "98")); // no string_field assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -128,12 +135,12 @@ public void processAdd_nullInputField_shouldLogAndIndexWithNoEnrichedField() thr @Test public void processAdd_failingEnrichment_shouldLogAndIndexWithNoEnrichedField() throws Exception { - loadChatModel("exception-throwing-chat-model.json"); + loadTestChatModel("exception-throwing-chat-model.json", "exception-throwing-chat-model"); addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "failingDocumentEnrichment"); addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "failingDocumentEnrichment"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -151,7 +158,7 @@ public void processAtomicUpdate_shouldTriggerEnrichmentAndFetchTheStoredContent( // (i.e., DistributedUpdateProcessorFactory before DocumentEnrichmentUpdateProcessorFactory), // the system correctly retrieves the stored value of string_field and generates the // enriched content for the document. - loadChatModel("dummy-chat-model.json"); + loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); assertU(adoc("id", "99", "string_field", "Vegeta is the saiyan prince.")); assertU(adoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth.")); assertU(commit()); @@ -162,7 +169,7 @@ public void processAtomicUpdate_shouldTriggerEnrichmentAndFetchTheStoredContent( addWithChain(atomicDoc, "documentEnrichmentForPartialUpdates"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -179,7 +186,7 @@ public void processAtomicUpdate_shouldReplaceExistingEnrichedFieldNotAppend() th // Verifies that when a document already contains an enriched_field and string_field is // modified via atomic update, the enriched content is recomputed and replaces the previous // value rather than being appended. - loadChatModel("dummy-chat-model.json"); + loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment"); assertU(commit()); @@ -190,7 +197,7 @@ public void processAtomicUpdate_shouldReplaceExistingEnrichedFieldNotAppend() th addWithChain(atomicDoc, "documentEnrichmentForPartialUpdates"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -205,7 +212,7 @@ public void processAtomicUpdate_shouldReplaceExistingEnrichedFieldNotAppend() th @Test public void processAdd_multipleInputFields_allPresent_shouldEnrichDocument() throws Exception { - loadChatModel("dummy-chat-model.json"); + loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain( sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "body_field", "He is very proud."), @@ -215,7 +222,7 @@ public void processAdd_multipleInputFields_allPresent_shouldEnrichDocument() thr "documentEnrichmentMultiField"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -228,7 +235,7 @@ public void processAdd_multipleInputFields_allPresent_shouldEnrichDocument() thr @Test public void processAdd_multipleInputFields_firstFieldNull_shouldSkipEnrichment() throws Exception { - loadChatModel("dummy-chat-model.json"); + loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain( sdoc("id", "99", "body_field", "He is very proud."), // string_field absent @@ -238,7 +245,7 @@ public void processAdd_multipleInputFields_firstFieldNull_shouldSkipEnrichment() "documentEnrichmentMultiField"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -251,7 +258,7 @@ public void processAdd_multipleInputFields_firstFieldNull_shouldSkipEnrichment() @Test public void processAdd_multipleInputFields_secondFieldEmpty_shouldSkipEnrichment() throws Exception { - loadChatModel("dummy-chat-model.json"); + loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain( sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "body_field", ""), @@ -261,7 +268,7 @@ public void processAdd_multipleInputFields_secondFieldEmpty_shouldSkipEnrichment "documentEnrichmentMultiField"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -274,13 +281,13 @@ public void processAdd_multipleInputFields_secondFieldEmpty_shouldSkipEnrichment @Test public void processAdd_multipleInputFields_bothFieldsAbsent_shouldSkipEnrichment() throws Exception { - loadChatModel("dummy-chat-model.json"); + loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain(sdoc("id", "99"), "documentEnrichmentMultiField"); addWithChain(sdoc("id", "98"), "documentEnrichmentMultiField"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -293,7 +300,7 @@ public void processAdd_multipleInputFields_bothFieldsAbsent_shouldSkipEnrichment @Test public void processAdd_multipleInputFields_failingModel_shouldLogAndSkipEnrichment() throws Exception { - loadChatModel("exception-throwing-chat-model.json"); + loadTestChatModel("exception-throwing-chat-model.json", "exception-throwing-chat-model"); addWithChain( sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "body_field", "He is very proud."), @@ -303,7 +310,7 @@ public void processAdd_multipleInputFields_failingModel_shouldLogAndSkipEnrichme "failingDocumentEnrichmentMultiField"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -314,10 +321,317 @@ public void processAdd_multipleInputFields_failingModel_shouldLogAndSkipEnrichme "!/response/docs/[1]/enriched_field=="); } - private SolrQuery getEnrichmentQuery() { + @Test + public void processAdd_multivaluedStringOutputField_shouldPopulateAllValues() throws Exception { + loadTestChatModel("dummy-chat-model-multivalued-string.json", "dummy-chat-multivalued-1"); + + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), + "documentEnrichmentMultivaluedString"); + addWithChain( + sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), + "documentEnrichmentMultivaluedString"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field_multi/[0]=='tag1'", + "/response/docs/[0]/enriched_field_multi/[1]=='tag2'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/enriched_field_multi/[0]=='tag1'", + "/response/docs/[1]/enriched_field_multi/[1]=='tag2'"); + } + + @Test + public void processAdd_multivaluedStringOutputField_emptyInput_shouldSkipEnrichment() + throws Exception { + loadTestChatModel("dummy-chat-model-multivalued-string.json", "dummy-chat-multivalued-1"); + + addWithChain(sdoc("id", "99", "string_field", ""), "documentEnrichmentMultivaluedString"); + addWithChain(sdoc("id", "98", "string_field", ""), "documentEnrichmentMultivaluedString"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field_multi==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field_multi=="); + } + + // --- typed single-valued output field tests --- + + @Test + public void processAdd_singleLongOutputField_shouldPopulateValue() throws Exception { + loadTestChatModel("dummy-chat-model-single-long.json", "dummy-long"); + + addWithChain(sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleLong"); + addWithChain(sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleLong"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_long"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_long==42", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_long==42"); + } + + @Test + public void processAdd_singleIntOutputField_shouldPopulateValue() throws Exception { + loadTestChatModel("dummy-chat-model-single-int.json", "dummy-int"); + + addWithChain(sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleInt"); + addWithChain(sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleInt"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_int"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_int==7", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_int==7"); + } + + @Test + public void processAdd_singleFloatOutputField_shouldPopulateValue() throws Exception { + loadTestChatModel("dummy-chat-model-single-float.json", "dummy-float"); + + addWithChain(sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleFloat"); + addWithChain(sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleFloat"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_float"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_float==1.5", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_float==1.5"); + } + + @Test + public void processAdd_singleDoubleOutputField_shouldPopulateValue() throws Exception { + loadTestChatModel("dummy-chat-model-single-double.json", "dummy-double"); + + addWithChain( + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleDouble"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleDouble"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_double"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_double==2.5", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_double==2.5"); + } + + @Test + public void processAdd_singleBooleanOutputField_shouldPopulateValue() throws Exception { + loadTestChatModel("dummy-chat-model-single-boolean.json", "dummy-boolean"); + + addWithChain( + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleBoolean"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleBoolean"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_boolean"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_boolean==true", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_boolean==true"); + } + + @Test + public void processAdd_singleDateOutputField_shouldPopulateValue() throws Exception { + loadTestChatModel("dummy-chat-model-single-date.json", "dummy-date"); + + addWithChain(sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleDate"); + addWithChain(sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleDate"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_date"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_date=='2024-01-15T00:00:00Z'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_date=='2024-01-15T00:00:00Z'"); + } + + // --- typed multivalued output field tests --- + + @Test + public void processAdd_multivaluedLongOutputField_shouldPopulateAllValues() throws Exception { + loadTestChatModel("dummy-chat-model-multivalued-long.json", "dummy-long-multi"); + + addWithChain( + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentMultivaluedLong"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentMultivaluedLong"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_long_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_long_multi/[0]==10", + "/response/docs/[0]/output_long_multi/[1]==20", + "/response/docs/[0]/output_long_multi/[2]==30", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_long_multi/[0]==10", + "/response/docs/[1]/output_long_multi/[1]==20", + "/response/docs/[1]/output_long_multi/[2]==30"); + } + + @Test + public void processAdd_multivaluedIntOutputField_shouldPopulateAllValues() throws Exception { + loadTestChatModel("dummy-chat-model-multivalued-int.json", "dummy-int-multi"); + + addWithChain( + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentMultivaluedInt"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentMultivaluedInt"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_int_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_int_multi/[0]==1", + "/response/docs/[0]/output_int_multi/[1]==2", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_int_multi/[0]==1", + "/response/docs/[1]/output_int_multi/[1]==2"); + } + + @Test + public void processAdd_multivaluedFloatOutputField_shouldPopulateAllValues() throws Exception { + loadTestChatModel("dummy-chat-model-multivalued-float.json", "dummy-float-multi"); + + addWithChain( + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentMultivaluedFloat"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentMultivaluedFloat"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_float_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_float_multi/[0]==1.5", + "/response/docs/[0]/output_float_multi/[1]==2.5", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_float_multi/[0]==1.5", + "/response/docs/[1]/output_float_multi/[1]==2.5"); + } + + @Test + public void processAdd_multivaluedDoubleOutputField_shouldPopulateAllValues() throws Exception { + loadTestChatModel("dummy-chat-model-multivalued-double.json", "dummy-double-multi"); + + addWithChain( + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentMultivaluedDouble"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentMultivaluedDouble"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_double_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_double_multi/[0]==3.14", + "/response/docs/[0]/output_double_multi/[1]==2.71", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_double_multi/[0]==3.14", + "/response/docs/[1]/output_double_multi/[1]==2.71"); + } + + @Test + public void processAdd_multivaluedBooleanOutputField_shouldPopulateAllValues() throws Exception { + loadTestChatModel("dummy-chat-model-multivalued-boolean.json", "dummy-boolean-multi"); + + addWithChain( + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentMultivaluedBoolean"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentMultivaluedBoolean"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_boolean_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_boolean_multi/[0]==true", + "/response/docs/[0]/output_boolean_multi/[1]==false", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_boolean_multi/[0]==true", + "/response/docs/[1]/output_boolean_multi/[1]==false"); + } + + @Test + public void processAdd_multivaluedDateOutputField_shouldPopulateAllValues() throws Exception { + loadTestChatModel("dummy-chat-model-multivalued-date.json", "dummy-date-multi"); + + addWithChain( + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentMultivaluedDate"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentMultivaluedDate"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_date_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_date_multi/[0]=='2024-01-15T00:00:00Z'", + "/response/docs/[0]/output_date_multi/[1]=='2025-06-30T00:00:00Z'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_date_multi/[0]=='2024-01-15T00:00:00Z'", + "/response/docs/[1]/output_date_multi/[1]=='2025-06-30T00:00:00Z'"); + } + + private SolrQuery getEnrichmentQuery(String enrichedFieldName) { final SolrQuery query = new SolrQuery(); query.setQuery("*:*"); - query.add("fl", "id,enriched_field"); + query.add("fl", "id,"+enrichedFieldName); query.add("sort", "id desc"); return query; } From cf0d6bb20411490a35691b22decf638a4315da61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Tue, 31 Mar 2026 14:52:48 +0200 Subject: [PATCH 06/17] [llm-document-enrichment] polished code, added tests and added file for documentation --- .../unreleased/llm-document-enrichment.yml | 4 ++ .../model/SolrChatModel.java | 7 +- .../DocumentEnrichmentUpdateProcessor.java | 2 +- ...umentEnrichmentUpdateProcessorFactory.java | 26 ++----- .../dummy-chat-model-malformed-json.json | 7 ++ .../dummy-chat-model-missing-value-key.json | 7 ++ .../dummy-chat-model-multivalued-scalar.json | 7 ++ .../exception-throwing-chat-model.json | 2 +- .../model/DummyChatModel.java | 2 +- .../store/rest/TestChatModelManager.java | 6 +- ...tEnrichmentUpdateProcessorFactoryTest.java | 14 ++-- ...DocumentEnrichmentUpdateProcessorTest.java | 72 +++++++++++++++++++ .../modules/indexing-guide/indexing-nav.adoc | 1 + .../pages/document-enrichment-with-llms.adoc | 19 +++++ 14 files changed, 139 insertions(+), 37 deletions(-) create mode 100644 changelog/unreleased/llm-document-enrichment.yml create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-malformed-json.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-missing-value-key.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-scalar.json create mode 100644 solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc diff --git a/changelog/unreleased/llm-document-enrichment.yml b/changelog/unreleased/llm-document-enrichment.yml new file mode 100644 index 000000000000..fd6e55d6249f --- /dev/null +++ b/changelog/unreleased/llm-document-enrichment.yml @@ -0,0 +1,4 @@ +title: Add DocumentEnrichmentUpdateProcessorFactory for LLM-based document enrichment at index time +type: added # added, changed, fixed, deprecated, removed, dependency_update, security, other +authors: +- name: Nicolò Rinaldi diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java index 1cc8edb0e742..afd45d11ca07 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java @@ -48,9 +48,9 @@ public class SolrChatModel implements Accountable { // timeout is type Duration private static final String TIMEOUT_PARAM = "timeout"; - // the following are Integer type + // the followings are Integer type private static final String MAX_RETRIES_PARAM = "maxRetries"; - private static final String THINKING_BUDGET_TOKENS ="thinkingBudgetTokens"; + private static final String THINKING_BUDGET_TOKENS = "thinkingBudgetTokens"; private static final String RANDOM_SEED = "randomSeed"; private final String name; @@ -152,7 +152,6 @@ public SolrChatModel( * @return the extracted value: a {@link String}, {@link Number}, {@link Boolean}, or {@link * java.util.List} depending on the Solr output field type */ - @SuppressWarnings("unchecked") public Object chat(String text, ResponseFormat responseFormat) { ChatRequest chatRequest = ChatRequest.builder() @@ -166,7 +165,7 @@ public Object chat(String text, ResponseFormat responseFormat) { SolrException.ErrorCode.SERVER_ERROR, "LLM response is missing the 'value' key: " + rawJson); } - return ((Map) map).get("value"); + return map.get("value"); } @Override diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java index 57ca29e1a7dd..3f90fd8eb580 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java @@ -35,7 +35,7 @@ class DocumentEnrichmentUpdateProcessor extends UpdateRequestProcessor { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private IndexSchema schema; + private final IndexSchema schema; private final List inputFields; private final String outputField; private final String prompt; diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java index 508b46fb7be7..659a20897eb4 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java @@ -199,7 +199,6 @@ public UpdateRequestProcessor getInstance( } final SchemaField outputFieldSchema = latestSchema.getField(outputField); - assertIsSupportedField(outputFieldSchema); ResponseFormat responseFormat = buildResponseFormat(outputFieldSchema); boolean multiValued = outputFieldSchema.multiValued(); @@ -219,33 +218,20 @@ public UpdateRequestProcessor getInstance( inputFields, outputField, prompt, chatModel, multiValued, responseFormat, req, next); } - /** - * Validates that the output field type is supported. Supported types are: textual (Str, Text), - * numeric (Int, Long, Float, Double), boolean and date. Vector and binary fields are not - * supported. - */ - protected void assertIsSupportedField(SchemaField schemaField) { - try { - toJsonSchemaElement(schemaField.getType()); - } catch (SolrException e) { - throw new SolrException( - SolrException.ErrorCode.SERVER_ERROR, - "field type is not supported by Document Enrichment: " + schemaField.getName()); - } - } - /** * Builds a {@link ResponseFormat} that instructs the model to return a JSON object {@code * {"value": ...}} whose value type matches the Solr field type. For multivalued fields the value - * is wrapped in a JSON array. + * is wrapped in a {@link JsonArraySchema} nested inside the root {@link JsonObjectSchema}. + * + *

Nesting {@link JsonArraySchema} inside a {@link JsonObjectSchema} property is supported by + * all langchain4j providers that implement structured outputs with {@link JsonObjectSchema} (OpenAI, Azure OpenAI, + * Google AI, Gemini, Mistral, Ollama, Amazon Bedrock, Watsonx). */ static ResponseFormat buildResponseFormat(SchemaField schemaField) { JsonSchemaElement valueElement = toJsonSchemaElement(schemaField.getType()); JsonSchemaElement valueSchema = schemaField.multiValued() - ? JsonArraySchema.builder().items(valueElement).build() // could be only supported by Gemini - // (source: https://github.com/langchain4j/langchain4j/blob/main/docs/docs/tutorials/structured-outputs.md) - // If not supported, we cannot support multivalued fields as outputField + ? JsonArraySchema.builder().items(valueElement).build() : valueElement; return ResponseFormat.builder() .type(ResponseFormatType.JSON) diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-malformed-json.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-malformed-json.json new file mode 100644 index 000000000000..bdc8394add3b --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-malformed-json.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-chat-1", + "params": { + "response": "not valid json at all" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-missing-value-key.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-missing-value-key.json new file mode 100644 index 000000000000..42a52faf650a --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-missing-value-key.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-chat-1", + "params": { + "response": "{\"result\": \"some value\"}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-scalar.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-scalar.json new file mode 100644 index 000000000000..2deb27259554 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-scalar.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-chat-multivalued-1", + "params": { + "response": "{\"value\": \"a single string\"}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/exception-throwing-chat-model.json b/solr/modules/language-models/src/test-files/modelChatExamples/exception-throwing-chat-model.json index 29bcce318ada..3fad70744ff5 100644 --- a/solr/modules/language-models/src/test-files/modelChatExamples/exception-throwing-chat-model.json +++ b/solr/modules/language-models/src/test-files/modelChatExamples/exception-throwing-chat-model.json @@ -3,4 +3,4 @@ "name": "exception-throwing-chat-model", "params": { } -} \ No newline at end of file +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java index 753150cb6f02..42987b1d69ce 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java @@ -77,4 +77,4 @@ public DummyChatModel build() { return new DummyChatModel(this.response); } } -} \ No newline at end of file +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java index dc1b67e0debb..49c1b70ce2e0 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java @@ -67,10 +67,6 @@ public void testRestManagerEndpoints() throws Exception { final String openAiClassName = "dev.langchain4j.model.openai.OpenAiChatModel"; - // fails — no params provided -// String model = "{ \"name\":\"testChatModel1\", \"class\":\"" + openAiClassName + "\"}"; -// assertJPut(ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==400"); - // success String model = "{ name:\"testChatModel2\", class:\"" @@ -119,7 +115,7 @@ public void testRestManagerEndpoints() throws Exception { restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/testChatModel2"); restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/testChatModel3"); restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/testChatModel4"); - assertJQ(ManagedChatModelStore.REST_END_POINT, "/models==[]'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models==[]"); } @Test diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java index b2ba1cf0a401..e92bded3c75e 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java @@ -286,7 +286,7 @@ public void init_notTextualOutputField_shouldThrowExceptionWithDetailedMessage() SolrException e = assertThrows(SolrException.class, () -> factory.getInstance(req, null, null)); assertEquals( - "field type is not supported by Document Enrichment: vector", e.getMessage()); + "field type is not supported by Document Enrichment: DenseVectorField", e.getMessage()); } @Test @@ -325,14 +325,15 @@ public void init_multipleInputFields_oneNotExistent_shouldThrowExceptionWithDeta } @Test - public void init_multivaluedStringOutputField_shouldNotThrowException() { + public void init_multivaluedStringOutputField_shouldNotThrowException() throws Exception { UpdateRequestProcessor instance = createUpdateProcessor("string_field", "enriched_field_multi", collection1, "model-mv"); assertNotNull(instance); + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model-mv"); } @Test - public void init_multivaluedStringOutputField_buildResponseFormat_shouldProduceArraySchema() { + public void init_multivaluedStringOutputField_buildResponseFormat_shouldProduceArraySchema() throws Exception { NamedList args = new NamedList<>(); ManagedChatModelStore.getManagedModelStore(collection1) .addModel(new SolrChatModel("model-rf", null, null)); @@ -356,6 +357,7 @@ public void init_multivaluedStringOutputField_buildResponseFormat_shouldProduceA assertEquals( dev.langchain4j.model.chat.request.ResponseFormatType.JSON, responseFormat.type()); assertNotNull(responseFormat.jsonSchema()); + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model-rf"); } @Test @@ -371,14 +373,15 @@ public void init_singleValuedStringOutputField_buildResponseFormat_shouldProduce } @Test - public void init_dynamicInputField_shouldNotThrowException() { + public void init_dynamicInputField_shouldNotThrowException() throws Exception{ UpdateRequestProcessor instance = createUpdateProcessor("text_s", "enriched_field", collection1, "model2"); assertNotNull(instance); + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model2"); } @Test - public void init_multipleDynamicInputFields_shouldNotThrowException() { + public void init_multipleDynamicInputFields_shouldNotThrowException() throws Exception{ NamedList args = new NamedList<>(); ManagedChatModelStore.getManagedModelStore(collection1) .addModel(new SolrChatModel("model1", null, null)); @@ -394,6 +397,7 @@ public void init_multipleDynamicInputFields_shouldNotThrowException() { SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; assertNotNull(factory.getInstance(req, null, null)); + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1"); } private UpdateRequestProcessor createUpdateProcessor( diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java index 5349de833df3..e88c8e549a33 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java @@ -628,6 +628,78 @@ public void processAdd_multivaluedDateOutputField_shouldPopulateAllValues() thro "/response/docs/[1]/output_date_multi/[1]=='2025-06-30T00:00:00Z'"); } + // --- LLM response contract violation tests --- + + @Test + public void processAdd_llmResponseMissingValueKey_shouldLogAndIndexWithNoEnrichedField() + throws Exception { + // Model returns valid JSON but without the required "value" key + loadTestChatModel("dummy-chat-model-missing-value-key.json", "dummy-chat-1"); + + addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); + } + + @Test + public void processAdd_llmResponseMalformedJson_shouldLogAndIndexWithNoEnrichedField() + throws Exception { + // Model returns a plain string that cannot be parsed as JSON + loadTestChatModel("dummy-chat-model-malformed-json.json", "dummy-chat-1"); + + addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); + } + + // --- multivalued output field / scalar response test --- + + @Test + public void processAdd_multivaluedOutputField_scalarLlmResponse_shouldStoreSingleValue() + throws Exception { + // Model returns {"value": "a single string"} for a multivalued output field. + // The scalar falls through the List check and is stored as a single-element value. + loadTestChatModel("dummy-chat-model-multivalued-scalar.json", "dummy-chat-multivalued-1"); + + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), + "documentEnrichmentMultivaluedString"); + addWithChain( + sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), + "documentEnrichmentMultivaluedString"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field_multi/[0]=='a single string'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/enriched_field_multi/[0]=='a single string'"); + } + private SolrQuery getEnrichmentQuery(String enrichedFieldName) { final SolrQuery query = new SolrQuery(); query.setQuery("*:*"); diff --git a/solr/solr-ref-guide/modules/indexing-guide/indexing-nav.adoc b/solr/solr-ref-guide/modules/indexing-guide/indexing-nav.adoc index 9b50849716c3..940225e8d4ef 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/indexing-nav.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/indexing-nav.adoc @@ -58,5 +58,6 @@ ** xref:partial-document-updates.adoc[] ** xref:reindexing.adoc[] ** xref:language-detection.adoc[] +** xref:document-enrichment-with-llms.adoc[] ** xref:de-duplication.adoc[] ** xref:content-streams.adoc[] diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc new file mode 100644 index 000000000000..4207a892e274 --- /dev/null +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc @@ -0,0 +1,19 @@ += Document Enrichment with LLMs +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +This module brings the power of *Language Models* to Solr. From 570c2aaf4cf1f0382ee8ecb3cbcab77249f8996a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Wed, 1 Apr 2026 11:11:54 +0200 Subject: [PATCH 07/17] [llm-document-enrichment] updated supported models + added tests --- gradle/libs.versions.toml | 3 + solr/modules/language-models/build.gradle | 3 + solr/modules/language-models/gradle.lockfile | 3 + .../anthropic-chat-model.json | 13 ++++ .../modelChatExamples/gemini-chat-model.json | 12 ++++ .../modelChatExamples/ollama-chat-model.json | 11 ++++ .../store/rest/TestChatModelManager.java | 59 +++++++++++++++++++ ...DocumentEnrichmentUpdateProcessorTest.java | 2 +- 8 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/anthropic-chat-model.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/gemini-chat-model.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/ollama-chat-model.json diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 8c854fb41b01..eadc0e41ce3f 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -450,11 +450,14 @@ ktor-client-serialization-json = { module = "io.ktor:ktor-serialization-kotlinx- ktor-server-cio = { module = "io.ktor:ktor-server-cio", version.ref = "ktor" } ktor-server-core = { module = "io.ktor:ktor-server-core", version.ref = "ktor" } ktor-server-htmlBuilder = { module = "io.ktor:ktor-server-html-builder", version.ref = "ktor" } +langchain4j-anthropic = { module = "dev.langchain4j:langchain4j-anthropic" } langchain4j-bom = { module = "dev.langchain4j:langchain4j-bom", version.ref = "langchain4j-bom" } langchain4j-cohere = { module = "dev.langchain4j:langchain4j-cohere" } langchain4j-core = { module = "dev.langchain4j:langchain4j-core" } +langchain4j-google-ai-gemini = { module = "dev.langchain4j:langchain4j-google-ai-gemini" } langchain4j-hugging-face = { module = "dev.langchain4j:langchain4j-hugging-face" } langchain4j-mistral-ai = { module = "dev.langchain4j:langchain4j-mistral-ai" } +langchain4j-ollama = { module = "dev.langchain4j:langchain4j-ollama" } langchain4j-open-ai = { module = "dev.langchain4j:langchain4j-open-ai" } lmax-disruptor = { module = "com.lmax:disruptor", version.ref = "lmax-disruptor" } locationtech-spatial4j = { module = "org.locationtech.spatial4j:spatial4j", version.ref = "spatial4j" } diff --git a/solr/modules/language-models/build.gradle b/solr/modules/language-models/build.gradle index a4dc82fc15cb..17d9716cfd20 100644 --- a/solr/modules/language-models/build.gradle +++ b/solr/modules/language-models/build.gradle @@ -29,9 +29,12 @@ dependencies { implementation libs.apache.lucene.core implementation libs.langchain4j.core + runtimeOnly libs.langchain4j.anthropic runtimeOnly libs.langchain4j.cohere + runtimeOnly libs.langchain4j.google.ai.gemini runtimeOnly libs.langchain4j.hugging.face runtimeOnly libs.langchain4j.mistral.ai + runtimeOnly libs.langchain4j.ollama runtimeOnly libs.langchain4j.open.ai implementation libs.slf4j.api diff --git a/solr/modules/language-models/gradle.lockfile b/solr/modules/language-models/gradle.lockfile index 27221b30a3e7..1427966a1ddc 100644 --- a/solr/modules/language-models/gradle.lockfile +++ b/solr/modules/language-models/gradle.lockfile @@ -40,13 +40,16 @@ com.tdunning:t-digest:3.3=jarValidation,runtimeClasspath,runtimeLibs,solrPlatfor commons-cli:commons-cli:1.10.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath commons-codec:commons-codec:1.19.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath commons-io:commons-io:2.20.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +dev.langchain4j:langchain4j-anthropic:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-bom:1.9.1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath dev.langchain4j:langchain4j-cohere:1.9.1-beta17=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-core:1.9.1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +dev.langchain4j:langchain4j-google-ai-gemini:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-http-client-jdk:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-http-client:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-hugging-face:1.9.1-beta17=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-mistral-ai:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath +dev.langchain4j:langchain4j-ollama:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-open-ai:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath io.dropwizard.metrics:metrics-annotation:4.2.26=jarValidation,testRuntimeClasspath io.dropwizard.metrics:metrics-core:4.2.26=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/anthropic-chat-model.json b/solr/modules/language-models/src/test-files/modelChatExamples/anthropic-chat-model.json new file mode 100644 index 000000000000..c4bd85ada4bb --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/anthropic-chat-model.json @@ -0,0 +1,13 @@ +{ + "class": "dev.langchain4j.model.anthropic.AnthropicChatModel", + "name": "anthropic-chat-1", + "params": { + "baseUrl": "https://api.anthropic.com/v1", + "apiKey": "apiKey-anthropic", + "modelName": "claude-3-5-haiku-latest", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/gemini-chat-model.json b/solr/modules/language-models/src/test-files/modelChatExamples/gemini-chat-model.json new file mode 100644 index 000000000000..0ac0a612daa2 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/gemini-chat-model.json @@ -0,0 +1,12 @@ +{ + "class": "dev.langchain4j.model.googleai.GoogleAiGeminiChatModel", + "name": "gemini-chat-1", + "params": { + "apiKey": "apiKey-gemini", + "modelName": "gemini-2.0-flash", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/ollama-chat-model.json b/solr/modules/language-models/src/test-files/modelChatExamples/ollama-chat-model.json new file mode 100644 index 000000000000..411a6468452b --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/ollama-chat-model.json @@ -0,0 +1,11 @@ +{ + "class": "dev.langchain4j.model.ollama.OllamaChatModel", + "name": "ollama-chat-1", + "params": { + "baseUrl": "http://localhost:11434", + "modelName": "llama3.2", + "timeout": 60, + "logRequests": true, + "logResponses": true + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java index 49c1b70ce2e0..25880eecbcd6 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java @@ -162,6 +162,65 @@ public void loadChatModel_mistralAi_shouldLoadModelConfig() throws Exception { restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + modelName); } + @Test + public void loadChatModel_anthropic_shouldLoadModelConfig() throws Exception { + loadChatModel("anthropic-chat-model.json"); + + final String modelName = "anthropic-chat-1"; + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='https://api.anthropic.com/v1'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-anthropic'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/modelName=='claude-3-5-haiku-latest'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); + + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + modelName); + } + + @Test + public void loadChatModel_ollama_shouldLoadModelConfig() throws Exception { + loadChatModel("ollama-chat-model.json"); + + final String modelName = "ollama-chat-1"; + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='http://localhost:11434'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='llama3.2'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + modelName); + } + + @Test + public void loadChatModel_gemini_shouldLoadModelConfig() throws Exception { + loadChatModel("gemini-chat-model.json"); + + final String modelName = "gemini-chat-1"; + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-gemini'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/modelName=='gemini-2.0-flash'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); + + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + modelName); + } + @Test public void loadChatModel_dummyUnsupportedParam_shouldRaiseError() throws Exception { loadChatModel("dummy-chat-model-unsupported.json", "400"); diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java index e88c8e549a33..048e073da9f0 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java @@ -165,7 +165,7 @@ public void processAtomicUpdate_shouldTriggerEnrichmentAndFetchTheStoredContent( SolrInputDocument atomicDoc = new SolrInputDocument(); atomicDoc.setField("id", "99"); - atomicDoc.setField("enriched", Map.of("set", "true")); + atomicDoc.setField("enriched", Map.of("set", true)); addWithChain(atomicDoc, "documentEnrichmentForPartialUpdates"); assertU(commit()); From 184f5797264bd6e3ce3cfca71636cf65b5a1b9cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Wed, 1 Apr 2026 11:12:26 +0200 Subject: [PATCH 08/17] [llm-document-enrichment] added documentation for 'Document Enrichment with LLMs' module --- .../pages/document-enrichment-with-llms.adoc | 461 +++++++++++++++++- 1 file changed, 460 insertions(+), 1 deletion(-) diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc index 4207a892e274..0681e99724aa 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc @@ -16,4 +16,463 @@ // specific language governing permissions and limitations // under the License. -This module brings the power of *Language Models* to Solr. +This module brings the power of *Large Language Models* to Solr. + +More specifically, it provides the capability, at indexing time, given a prompt and a set of input fields, of calling an +LLM through https://github.com/langchain4j/langchain4j[LangChain4j] for each document and store the result of the call +in an `outputField`, that can be of multiple types and even multivalued. + +_Without_ this module, the LLM calls must be done _outside_ Solr, before indexing. + +[IMPORTANT] +==== +This module sends your documents off to some hosted service on the internet. +There are cost, privacy, performance, and service availability implications on such a strong dependency that should be +diligently examined before employing this module in a serious way. + +==== + +At the moment a subset of LLM providers supported by LangChain4j is supported by Solr. + +*Disclaimer*: Apache Solr is *in no way* affiliated to any of these corporations or services. + +If you want to add support for additional services or improve the support for the existing ones, feel free to +contribute: + +* https://github.com/apache/solr/blob/main/CONTRIBUTING.md[Contributing to Solr] + +== Module + +This is provided via the `language-models` xref:configuration-guide:solr-modules.adoc[Solr Module] that needs to be +enabled before use. + +== Language Model Configuration + +Language Models is a module and therefore its plugins must be configured in `solrconfig.xml`. + +=== Minimum Requirements + +* Enable the `language-models` module to make the Language Models classes available on Solr's classpath. +See xref:configuration-guide:solr-modules.adoc[Solr Module] for more details. + +* An update processor, similar to the one below, must be declared in `solrconfig.xml`: ++ +[source,xml] +---- + + + string_field + summary + Summarize this content: {string_field} + model-name + + + +---- +[NOTE] +==== +If no component is configured in `solrconfig.xml`, the `ChatModel` store will not be registered and requests to `/schema/chat-model-store` will return an error. +==== + +== Document Enrichment Lifecycle + +=== Models + +* A model in this module is a chat model, that answers with text given a prompt. +* A model in this Solr module is a reference to an external API that runs the Large Language Model responsible chat +completion. + +[IMPORTANT] +==== +the Solr chat model specifies the parameters to access the APIs, the LLM doesn't run internally in Solr + +==== + +A model is described by these parameters: + + +`class`:: ++ +[%autowidth,frame=none] +|=== +s|Required |Default: none +|=== ++ +The model implementation. +Accepted values: + +* `dev.langchain4j.model.ollama.OllamaChatModel` +* `dev.langchain4j.model.mistralai.MistralAiChatModel` +* `dev.langchain4j.model.anthropic.AnthropicChatModel` +* `dev.langchain4j.model.openai.OpenAiChatModel` +* `dev.langchain4j.model.googleai.GoogleAiGeminiChatModel` + +`name`:: ++ +[%autowidth,frame=none] +|=== +s|Required |Default: none +|=== ++ +The identifier of your model, this is used by any component that intends to use the model (e.g., `DocumentEnrichmentUpdateProcessorFactory` update processor). + +`params`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Each model class has potentially different params. +Many are shared but for the full set of parameters of the model you are interested in please refer to the official documentation of the LangChain4j version included in Solr: https://docs.langchain4j.dev/category/language-models[Chat Models in LangChain4j]. + +=== Supported Models +Apache Solr uses https://github.com/langchain4j/langchain4j[LangChain4j] to support document enrichement with LLMs. +The models currently supported are: + +[tabs#supported-chat-models] +====== +Ollama:: ++ +==== + +[source,json] +---- +{ + "class": "dev.langchain4j.model.ollama.OllamaChatModel", + "name": "", + "params": { + "baseUrl": "http://localhost:11434", + "modelName": "", + "timeout": 300, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} +---- +==== + +MistralAI:: ++ +==== +[source,json] +---- +{ + "class": "dev.langchain4j.model.mistralai.MistralAiChatModel", + "name": "", + "params": { + "baseUrl": "https://api.mistral.ai/v1", + "apiKey": "", + "modelName": "", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} +---- +==== +OpenAI:: ++ +==== +[source,json] +---- +{ + "class": "dev.langchain4j.model.openai.OpenAiChatModel", + "name": "", + "params": { + "baseUrl": "https://api.openai.com/v1", + "apiKey": "", + "modelName": "", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} +---- +==== + +Anthropic:: ++ +==== +[source,json] +---- +{ + "class": "dev.langchain4j.model.anthropic.AnthropicChatModel", + "name": "", + "params": { + "baseUrl": "https://api.anthropic.com/v1/", + "apiKey": "", + "modelName": "", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} +---- +==== + +Gemini:: ++ +==== +[source,json] +---- +{ + "class": "dev.langchain4j.model.googleai.GoogleAiGeminiChatModel", + "name": "", + "params": { + "baseUrl": "https://generativelanguage.googleapis.com/v1beta/", + "apiKey": "", + "modelName": "", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} +---- +==== +====== + +=== Uploading a Model + +To upload the model in a `/path/myModel.json` file, please run: + +[source,bash] +---- +curl -XPUT 'http://localhost:8983/solr/YOUR_COLLECTION/schema/chat-model-store' --data-binary "@/path/myModel.json" -H 'Content-type:application/json' +---- + +To delete the `currentModel` model: + +[source,bash] +---- +curl -XDELETE 'http://localhost:8983/solr/YOUR_COLLECTION/schema/chat-model-store/currentModel' +---- + +To view all models: + +[source,text] +http://localhost:8983/solr/YOUR_COLLECTION/schema/chat-model-store + + +.Example: /path/myModel.json +[source,json] +---- +{ + "class": "dev.langchain4j.model.openai.OpenAiChatModel", + "name": "openai-1", + "params": { + "baseUrl": "https://api.openai.com/v1", + "apiKey": "apiKey-openAI", + "modelName": "gpt-5.4-nano", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} +---- + +=== How to Trigger Document Enrichment during Indexing +To create new fields starting from existent ones in your documents at indexing time you need to configure an {solr-javadocs}/core/org/apache/solr/update/processor/UpdateRequestProcessorChain.html[Update Request Processor Chain] that includes at least one `DocumentEnrichmentUpdateProcessor` update request processor in one of the 2 following way: + +* Update processor with parameter `prompt` ++ +[source,xml] +---- + + + string_field + summary + Summarize this content: {string_field} + model-name + + + +---- + +* Update processor with parameter `promptFile`: in this case, the file `prompt.txt` must be uploaded to Solr similarly to any other configuration file (e.g., `solrconfig.xml`, `synonyms.txt`, etc.) ++ +[source,xml] +---- + + + string_field + summary + prompt.txt + model-name + + + +---- + +Exactly one of the following parameters is required: `prompt` or `promptFile`. + +Another important feature of this module is that one (or more) `inputField` needs to be injected in the prompt. This is +done by some special tokens, that are the `fieldName` surrounded by curly brackets (e.g., `{fieldName}`). These tokens +are _mandatory_ for this module to work properly. Solr will throw an error if the parameters are not properly defined. +For example, both the prompt and the content of the file prompt.txt, must contain the text '{string_field}', which +will be substituted with the content of the `string_field` field for each document. An example of a valid prompt with +multiple input fields is as follows: + +[source,xml] +---- + + + title + body + summary + Summarize with the following information. Title: {title}. Body: {body}. + chat-model + + + +---- + +The LLM response is mapped to the specified `outputField`. Note that this module only supports a subset of Solr's +available field types, which includes: + +* *String/Text*: `StrField`, `TextField` +* *Date*: `DatePointField` +* *Numeric*: `IntPointField`, `LongPointField, `FloatPointField`, `DoublePointField` +* *Boolean*: `BoolField` + + +This fields _can_ be multivalued. Solr uses structured output form LangChain4j to deal with LLMs' responses. + + +For more details on how to work with update request processors in Apache Solr, please refer to the dedicated page: +xref:configuration-guide:update-request-processors.adoc[Update Request Processor] + +[IMPORTANT] +==== +This update processor sends your document field content off to some hosted service on the internet. +There are serious performance implications that should be diligently examined before employing this component in production. +It will slow down substantially your indexing pipeline so make sure to stress test your solution before going live. + +==== + +=== Index first and enrich your documents on a second pass +LLM calls are usually quite slow, so, depending on your use case it could be a good idea to index first your documents +enrich them with new LLM-generated fields later on. + +This can be done in Solr defining two update request processors chains: one that includes all the processors you need, +excluded the `DocumentEnrichmentUpdateProcessor` (let's call it 'no-enrichment') and one that includes the +`DocumentEnrichmentUpdateProcessor` (let's call it 'enrichment'). + +[source,xml] +---- + + + ... + + ... + + ... + + + +---- + +[source,xml] +---- + + + ... + + ... + + ... + + + string_field + summary + Summarize this content: {string_field} + chat-model + + + +---- + +You would index your documents first using the 'no-enrichment' and when finished, incrementally repeat the indexing +targeting the 'enrichment' chain. + +[IMPORTANT] +==== +This implies you need to send the documents you want to index to Solr twice and re-run any other update request +processor you need, in the second chain. This has data traffic implications (you transfer your documents over the +network twice) and processing implications (if you have other update request processors in your chain, those must be +repeated the second time as we are literally replacing the indexed documents one by one). +==== + +If your use case is compatible with xref:indexing-guide:partial-document-updates.adoc[Partial Updates], you can do better: + +You still define two chains, but this time the 'enrichment' one only includes the 'DocumentEnrichmentUpdateProcessor' +(and the xref:configuration-guide:update-request-processors.adoc[Mandatory Processors] ) + +[source,xml] +---- + + + ... + + ... + + ... + + + +---- + +[source,xml] +---- + + + + string_field + summary + Summarize this content: {string_field} + chat-model + + + +---- + +[NOTE] +==== +Since partial updates are resolved by `DistributedUpdateProcessorFactory`, be sure to place +`DocumentEnrichmentUpdateProcessorFactory` afterwards so that it sees normal/complete documents. +==== + +Add to your schema a simple field that will be useful to track the enrichment process and use atomic updates: + +[source,xml] +---- + + +---- + +In the first pass just index your documents using your reliable and fast 'no-enrichment' chain. + +On the second pass, re-index all your documents using atomic updates and targeting the 'enrichment' chain: + +[source,json] +---- +{ + "id":"mydoc", + "enriched": { + "set": true + } +} +---- + +What will happen is that internally Solr fetches the stored content of the docs to update, all the existing fields are +retrieved and a re-indexing happens, targeting the 'enrichment' chain that will add the LLM-generated fields and set the +boolean `enriched` field to `true`. + +Faceting or querying on the boolean `enriched` field can also give you a quick idea on how many documents have been +enriched with the new generated fields. From 1b7c972756b2e0a76ebb9cf84a8be9a56cbc7eee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Wed, 1 Apr 2026 17:29:26 +0200 Subject: [PATCH 09/17] [llm-document-enrichment] cleanup of DocumentEnrichmentUpdateProcessorFactory --- ...umentEnrichmentUpdateProcessorFactory.java | 28 ++++++++----------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java index 659a20897eb4..93d730c852f7 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java @@ -112,14 +112,11 @@ public class DocumentEnrichmentUpdateProcessorFactory extends UpdateRequestProce private static final String MODEL_NAME = "model"; private static final Pattern PLACEHOLDER_PATTERN = Pattern.compile("\\{([^}]+)\\}"); - private ManagedChatModelStore modelStore = null; - private List inputFields; private String outputField; - private String prompt; + private String promptText; private String promptFile; private String modelName; - private SolrParams params; @Override public void init(final NamedList args) { @@ -133,7 +130,7 @@ public void init(final NamedList args) { } inputFields = List.copyOf(fieldNames); - params = args.toSolrParams(); + SolrParams params = args.toSolrParams(); RequiredSolrParams required = params.required(); outputField = required.get(OUTPUT_FIELD_PARAM); modelName = required.get(MODEL_NAME); @@ -153,7 +150,7 @@ public void init(final NamedList args) { } if (inlinePrompt != null) { validatePromptPlaceholders(inlinePrompt, inputFields); - this.prompt = inlinePrompt; + this.promptText = inlinePrompt; } this.promptFile = promptFilePath; } @@ -164,25 +161,22 @@ public void inform(SolrCore core) { ManagedChatModelStore.registerManagedChatModelStore(solrResourceLoader, this); if (promptFile != null) { try (InputStream is = solrResourceLoader.openResource(promptFile)) { - prompt = new String(is.readAllBytes(), StandardCharsets.UTF_8).trim(); + promptText = new String(is.readAllBytes(), StandardCharsets.UTF_8).trim(); } catch (IOException e) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Cannot read prompt file: " + promptFile, e); } - validatePromptPlaceholders(prompt, inputFields); + validatePromptPlaceholders(promptText, inputFields); } } @Override public void onManagedResourceInitialized(NamedList args, ManagedResource res) throws SolrException { - if (res instanceof ManagedChatModelStore) { - modelStore = (ManagedChatModelStore) res; - } - if (modelStore != null) { - modelStore.loadStoredModels(); + if (res instanceof ManagedChatModelStore store) { + store.loadStoredModels(); } } @@ -203,8 +197,8 @@ public UpdateRequestProcessor getInstance( ResponseFormat responseFormat = buildResponseFormat(outputFieldSchema); boolean multiValued = outputFieldSchema.multiValued(); - ManagedChatModelStore modelStore = ManagedChatModelStore.getManagedModelStore(req.getCore()); - SolrChatModel chatModel = modelStore.getModel(modelName); + ManagedChatModelStore store = ManagedChatModelStore.getManagedModelStore(req.getCore()); + SolrChatModel chatModel = store.getModel(modelName); if (chatModel == null) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, @@ -215,7 +209,7 @@ public UpdateRequestProcessor getInstance( } return new DocumentEnrichmentUpdateProcessor( - inputFields, outputField, prompt, chatModel, multiValued, responseFormat, req, next); + inputFields, outputField, promptText, chatModel, multiValued, responseFormat, req, next); } /** @@ -306,7 +300,7 @@ public String getOutputField() { } public String getPrompt() { - return prompt; + return promptText; } public String getModelName() { From 3d29d164065deefc77257f47e3c3c3c294da02dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Mon, 13 Apr 2026 14:41:35 +0200 Subject: [PATCH 10/17] [llm-document-enrichment] Addressed comments in Anna's review --- .../model/SolrChatModel.java | 27 +- .../store/rest/ManagedChatModelStore.java | 4 +- ...umentEnrichmentUpdateProcessorFactory.java | 41 +- .../rest/ManagedTextToVectorModelStore.java | 2 +- .../dummy-chat-model-malformed-json.json | 7 - .../dummy-chat-model-missing-value-key.json | 7 - .../dummy-chat-model-multivalued-boolean.json | 7 - .../dummy-chat-model-multivalued-date.json | 7 - .../dummy-chat-model-multivalued-double.json | 7 - .../dummy-chat-model-multivalued-float.json | 7 - .../dummy-chat-model-multivalued-int.json | 7 - .../dummy-chat-model-multivalued-long.json | 7 - .../dummy-chat-model-multivalued-scalar.json | 7 - .../dummy-chat-model-multivalued-string.json | 7 - .../dummy-chat-model-single-boolean.json | 7 - .../dummy-chat-model-single-date.json | 7 - .../dummy-chat-model-single-double.json | 7 - .../dummy-chat-model-single-float.json | 7 - .../dummy-chat-model-single-int.json | 7 - .../dummy-chat-model-single-long.json | 7 - .../solr/collection1/conf/enumsConfig.xml | 9 + .../conf/schema-language-models.xml | 30 ++ .../conf/solrconfig-document-enrichment.xml | 25 +- .../model/DummyChatModel.java | 5 + .../rest/TestChatModelManagerPersistence.java | 8 +- ...stManagedChatModelStoreInitialization.java | 4 +- ...tEnrichmentUpdateProcessorFactoryTest.java | 173 ++++--- ...DocumentEnrichmentUpdateProcessorTest.java | 473 ++++++++---------- 28 files changed, 422 insertions(+), 491 deletions(-) delete mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-malformed-json.json delete mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-missing-value-key.json delete mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-boolean.json delete mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-date.json delete mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-double.json delete mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-float.json delete mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-int.json delete mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-long.json delete mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-scalar.json delete mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-string.json delete mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-boolean.json delete mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-date.json delete mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-double.json delete mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-float.json delete mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-int.json delete mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-long.json create mode 100644 solr/modules/language-models/src/test-files/solr/collection1/conf/enumsConfig.xml diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java index afd45d11ca07..542985a16e61 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java @@ -37,18 +37,15 @@ import org.slf4j.LoggerFactory; /** - * This object wraps a {@link ChatModel} to produce the content of new fields from another. - * It's meant to be used as a managed resource with the {@link + * This object wraps a {@link dev.langchain4j.model.chat.ChatModel} to produce the content of a field based on the + * content of other fields specified as input. It's meant to be used as a managed resource with the {@link * ManagedChatModelStore} */ public class SolrChatModel implements Accountable { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(SolrChatModel.class); - // timeout is type Duration private static final String TIMEOUT_PARAM = "timeout"; - - // the followings are Integer type private static final String MAX_RETRIES_PARAM = "maxRetries"; private static final String THINKING_BUDGET_TOKENS = "thinkingBudgetTokens"; private static final String RANDOM_SEED = "randomSeed"; @@ -71,15 +68,15 @@ public static SolrChatModel getInstance( * Each model has its own list of parameters we don't know beforehand, but each {@link dev.langchain4j.model.chat.ChatModel} class * has its own builder that uses setters with the same name of the parameter in input. * */ - ChatModel textToTextModel; + ChatModel chatModel; Class modelClass = solrResourceLoader.findClass(className, ChatModel.class); var builder = modelClass.getMethod("builder").invoke(null); if (params != null) { /* * This block of code has the responsibility of instantiate a {@link - * dev.langchain4j.model.chat.ChatModel} using the params provided.classes have - * params of The specific implementation of {@link - * dev.langchain4j.model.chat.ChatModel} is not known beforehand. So we benefit of + * dev.langchain4j.model.chat.ChatModel} using the params provided. Classes have + * params of the specific implementation of {@link + * dev.langchain4j.model.chat.ChatModel}, which is not known beforehand. So we benefit of * the design choice in langchain4j that each subclass implementing {@link * dev.langchain4j.model.chat.ChatModel} uses setters with the same name of the * param. @@ -130,8 +127,8 @@ public static SolrChatModel getInstance( } } } - textToTextModel = (ChatModel) builder.getClass().getMethod("build").invoke(builder); - return new SolrChatModel(name, textToTextModel, params); + chatModel = (ChatModel) builder.getClass().getMethod("build").invoke(builder); + return new SolrChatModel(name, chatModel, params); } catch (final Exception e) { throw new ChatModelException("Model loading failed for " + className, e); } @@ -149,21 +146,21 @@ public SolrChatModel( * Sends a structured chat request and returns the parsed value from the {@code {"value": ...}} * JSON object that the model is instructed to produce via {@code responseFormat}. * - * @return the extracted value: a {@link String}, {@link Number}, {@link Boolean}, or {@link + * @return the extracted value: a {@link String}, {@link Number}, {@link Integer}, {@link Boolean}, or {@link * java.util.List} depending on the Solr output field type */ - public Object chat(String text, ResponseFormat responseFormat) { + public Object chat(String prompt, ResponseFormat responseFormat) { ChatRequest chatRequest = ChatRequest.builder() .responseFormat(responseFormat) - .messages(UserMessage.from(text)) + .messages(UserMessage.from(prompt)) .build(); String rawJson = chatModel.chat(chatRequest).aiMessage().text(); Object parsed = Utils.fromJSONString(rawJson); if (!(parsed instanceof Map map) || !map.containsKey("value")) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, - "LLM response is missing the 'value' key: " + rawJson); + "LLM was not able to format the response correctly: " + rawJson); } return map.get("value"); } diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java index f8c6414354d8..306ee19f02d9 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java @@ -94,7 +94,7 @@ public static SolrChatModel fromModelMap( } private static LinkedHashMap toModelMap(SolrChatModel model) { - final LinkedHashMap modelMap = new LinkedHashMap<>(5, 1.0f); + final LinkedHashMap modelMap = new LinkedHashMap<>(3, 1.0f); modelMap.put(NAME_KEY, model.getName()); modelMap.put(CLASS_KEY, model.getChatModelClassName()); modelMap.put(PARAMS_KEY, model.getParams()); @@ -114,7 +114,7 @@ public ManagedChatModelStore( @Override protected ManagedResourceStorage createStorage( ManagedResourceStorage.StorageIO storageIO, SolrResourceLoader loader) throws SolrException { - return new ManagedResourceStorage.JsonStorage(storageIO, loader, -1); + return new ManagedResourceStorage.JsonStorage(storageIO, loader); } @Override diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java index 93d730c852f7..0fbda6969bc0 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java @@ -32,7 +32,6 @@ import java.nio.charset.StandardCharsets; import java.util.Collection; import java.util.HashSet; -import java.util.LinkedHashSet; import java.util.List; import java.util.Set; import java.util.regex.Matcher; @@ -58,15 +57,17 @@ import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IntPointField; import org.apache.solr.schema.LongPointField; +import org.apache.solr.schema.NestPathField; import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.StrField; import org.apache.solr.schema.TextField; +import org.apache.solr.schema.UUIDField; import org.apache.solr.update.processor.UpdateRequestProcessor; import org.apache.solr.update.processor.UpdateRequestProcessorFactory; import org.apache.solr.util.plugin.SolrCoreAware; /** - * Insert in an existing field the output of the model coming from one or more textual field values. + * Generate the content of a field based on other fields specified as input. * *

One or more {@code inputField} parameters specify the Solr fields to use as input. Each field * name must appear as a {@code {fieldName}} placeholder in the prompt. Exactly one of {@code @@ -82,6 +83,20 @@ * </processor> * * + *

Multiple {@code inputField} values can also be declared as an array using {@code arr}: + * + *

+ * <processor class="solr.llm.documentenrichment.update.processor.DocumentEnrichmentUpdateProcessorFactory">
+ *   <arr name="inputField">
+ *     <str>title_field</str>
+ *     <str>body_field</str>
+ *   </arr>
+ *   <str name="outputField">enriched_field</str>
+ *   <str name="prompt">Title: {title_field}. Body: {body_field}.</str>
+ *   <str name="model">ChatModel</str>
+ * </processor>
+ * 
+ * *

Alternatively, the prompt can be loaded from a text file using {@code promptFile}: * *

@@ -243,7 +258,9 @@ static ResponseFormat buildResponseFormat(SchemaField schemaField) {
 
   private static JsonSchemaElement toJsonSchemaElement(FieldType fieldType) {
     // DenseVectorField extends FloatPointField, so it must be rejected before the numeric checks
-    if (fieldType instanceof DenseVectorField) {
+    if (fieldType instanceof DenseVectorField
+        || fieldType instanceof UUIDField
+        || fieldType instanceof NestPathField) {
       throw new SolrException(
           SolrException.ErrorCode.SERVER_ERROR,
           "field type is not supported by Document Enrichment: "
@@ -268,26 +285,26 @@ private static JsonSchemaElement toJsonSchemaElement(FieldType fieldType) {
   }
 
   private static void validatePromptPlaceholders(String prompt, List fieldNames) {
-    Set promptPlaceholders = new LinkedHashSet<>();
+    Set promptPlaceholders = new HashSet<>();
     Matcher m = PLACEHOLDER_PATTERN.matcher(prompt);
     while (m.find()) {
       promptPlaceholders.add(m.group(1));
     }
 
-    Set missingInPrompt = new LinkedHashSet<>(fieldNames);
-    missingInPrompt.removeAll(promptPlaceholders);
-    if (!missingInPrompt.isEmpty()) {
+    Set fieldsWithoutPlaceholder = new HashSet<>(fieldNames);
+    fieldsWithoutPlaceholder.removeAll(promptPlaceholders);
+    if (!fieldsWithoutPlaceholder.isEmpty()) {
       throw new SolrException(
           SolrException.ErrorCode.SERVER_ERROR,
-          "prompt is missing placeholders for inputField(s): " + missingInPrompt);
+          "prompt is missing placeholders for inputField(s): " + fieldsWithoutPlaceholder);
     }
 
-    Set unknownInPrompt = new LinkedHashSet<>(promptPlaceholders);
-    unknownInPrompt.removeAll(new HashSet<>(fieldNames));
-    if (!unknownInPrompt.isEmpty()) {
+    Set placeholdersWithoutField = new HashSet<>(promptPlaceholders);
+    placeholdersWithoutField.removeAll(new HashSet<>(fieldNames));
+    if (!placeholdersWithoutField.isEmpty()) {
       throw new SolrException(
           SolrException.ErrorCode.SERVER_ERROR,
-          "prompt contains placeholders not declared as inputField(s): " + unknownInPrompt);
+          "prompt contains placeholders not declared as inputField(s): " + placeholdersWithoutField);
     }
   }
 
diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/store/rest/ManagedTextToVectorModelStore.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/store/rest/ManagedTextToVectorModelStore.java
index 70c03ffc47ea..f6bc0f48a773 100644
--- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/store/rest/ManagedTextToVectorModelStore.java
+++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/store/rest/ManagedTextToVectorModelStore.java
@@ -94,7 +94,7 @@ public static SolrTextToVectorModel fromModelMap(
   }
 
   private static LinkedHashMap toModelMap(SolrTextToVectorModel model) {
-    final LinkedHashMap modelMap = new LinkedHashMap<>(5, 1.0f);
+    final LinkedHashMap modelMap = new LinkedHashMap<>(3, 1.0f);
     modelMap.put(NAME_KEY, model.getName());
     modelMap.put(CLASS_KEY, model.getEmbeddingModelClassName());
     modelMap.put(PARAMS_KEY, model.getParams());
diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-malformed-json.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-malformed-json.json
deleted file mode 100644
index bdc8394add3b..000000000000
--- a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-malformed-json.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel",
-  "name": "dummy-chat-1",
-  "params": {
-    "response": "not valid json at all"
-  }
-}
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-missing-value-key.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-missing-value-key.json
deleted file mode 100644
index 42a52faf650a..000000000000
--- a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-missing-value-key.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel",
-  "name": "dummy-chat-1",
-  "params": {
-    "response": "{\"result\": \"some value\"}"
-  }
-}
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-boolean.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-boolean.json
deleted file mode 100644
index 7ba22888cb2b..000000000000
--- a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-boolean.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel",
-  "name": "dummy-boolean-multi",
-  "params": {
-    "response": "{\"value\": [true, false]}"
-  }
-}
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-date.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-date.json
deleted file mode 100644
index f159e3334614..000000000000
--- a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-date.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel",
-  "name": "dummy-date-multi",
-  "params": {
-    "response": "{\"value\": [\"2024-01-15T00:00:00Z\", \"2025-06-30T00:00:00Z\"]}"
-  }
-}
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-double.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-double.json
deleted file mode 100644
index 8b01495e474e..000000000000
--- a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-double.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel",
-  "name": "dummy-double-multi",
-  "params": {
-    "response": "{\"value\": [3.14, 2.71]}"
-  }
-}
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-float.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-float.json
deleted file mode 100644
index 0415048c1315..000000000000
--- a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-float.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel",
-  "name": "dummy-float-multi",
-  "params": {
-    "response": "{\"value\": [1.5, 2.5]}"
-  }
-}
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-int.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-int.json
deleted file mode 100644
index ff15d3f0b584..000000000000
--- a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-int.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel",
-  "name": "dummy-int-multi",
-  "params": {
-    "response": "{\"value\": [1, 2]}"
-  }
-}
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-long.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-long.json
deleted file mode 100644
index 03c06eb0f5d3..000000000000
--- a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-long.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel",
-  "name": "dummy-long-multi",
-  "params": {
-    "response": "{\"value\": [10, 20, 30]}"
-  }
-}
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-scalar.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-scalar.json
deleted file mode 100644
index 2deb27259554..000000000000
--- a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-scalar.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel",
-  "name": "dummy-chat-multivalued-1",
-  "params": {
-    "response": "{\"value\": \"a single string\"}"
-  }
-}
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-string.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-string.json
deleted file mode 100644
index b482ef654211..000000000000
--- a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-string.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel",
-  "name": "dummy-chat-multivalued-1",
-  "params": {
-    "response": "{\"value\": [\"tag1\", \"tag2\"]}"
-  }
-}
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-boolean.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-boolean.json
deleted file mode 100644
index caca167287a6..000000000000
--- a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-boolean.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel",
-  "name": "dummy-boolean",
-  "params": {
-    "response": "{\"value\": true}"
-  }
-}
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-date.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-date.json
deleted file mode 100644
index b98eb53cf506..000000000000
--- a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-date.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel",
-  "name": "dummy-date",
-  "params": {
-    "response": "{\"value\": \"2024-01-15T00:00:00Z\"}"
-  }
-}
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-double.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-double.json
deleted file mode 100644
index 5301937628f7..000000000000
--- a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-double.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel",
-  "name": "dummy-double",
-  "params": {
-    "response": "{\"value\": 2.5}"
-  }
-}
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-float.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-float.json
deleted file mode 100644
index 8f0c63512a35..000000000000
--- a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-float.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel",
-  "name": "dummy-float",
-  "params": {
-    "response": "{\"value\": 1.5}"
-  }
-}
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-int.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-int.json
deleted file mode 100644
index 664d846e1260..000000000000
--- a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-int.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel",
-  "name": "dummy-int",
-  "params": {
-    "response": "{\"value\": 7}"
-  }
-}
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-long.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-long.json
deleted file mode 100644
index 6d58cab102fa..000000000000
--- a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-long.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel",
-  "name": "dummy-long",
-  "params": {
-    "response": "{\"value\": 42}"
-  }
-}
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/enumsConfig.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/enumsConfig.xml
new file mode 100644
index 000000000000..7292b9204753
--- /dev/null
+++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/enumsConfig.xml
@@ -0,0 +1,9 @@
+
+
+
+  
+    Low
+    Medium
+    High
+  
+
diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml
index a7d329e1a88f..073da9a6f2d9 100644
--- a/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml
+++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml
@@ -41,6 +41,7 @@
   
   
   
+  
   
   
 
@@ -59,6 +60,20 @@
   
   
   
+  
+
+  
+  
+  
+  
+  
+  
+  
+
+  
+  
+  
+  
 
   
   
@@ -66,6 +81,21 @@
   
   
 
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+    
+      
+    
+  
+
   
     
       
diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml
index f9b82c153d9e..b2164e215e54 100644
--- a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml
+++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml
@@ -59,6 +59,19 @@
   
  
 
+  
+    
+      
+        string_field
+        body_field
+      
+      enriched_field
+      Title: {string_field}. Body: {body_field}.
+      dummy-chat-1
+    
+    
+  
+
  
   
    string_field
@@ -232,4 +245,14 @@
   
  
 
-
\ No newline at end of file
+  
+    
+      tags_field
+      enriched_field
+      Classify these tags: {tags_field}
+      dummy-chat-1
+    
+    
+  
+
+
diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java
index 42987b1d69ce..38d9791cd9a6 100644
--- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java
+++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java
@@ -17,6 +17,7 @@
 package org.apache.solr.languagemodels.documentenrichment.model;
 
 import dev.langchain4j.data.message.AiMessage;
+import dev.langchain4j.data.message.UserMessage;
 import dev.langchain4j.model.chat.ChatModel;
 import dev.langchain4j.model.chat.request.ChatRequest;
 import dev.langchain4j.model.chat.response.ChatResponse;
@@ -31,6 +32,9 @@
  */
 public class DummyChatModel implements ChatModel {
 
+  /** The text of the last prompt received by any instance. Useful for test assertions. */
+  public static String lastReceivedPrompt;
+
   private final String response;
 
   public DummyChatModel(String response) {
@@ -39,6 +43,7 @@ public DummyChatModel(String response) {
 
   @Override
   public ChatResponse chat(ChatRequest chatRequest) {
+    lastReceivedPrompt = ((UserMessage) chatRequest.messages().getFirst()).singleText();
     return ChatResponse.builder().aiMessage(AiMessage.from(response)).build();
   }
 
diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java
index 654c98556ab4..0da79ce23a29 100644
--- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java
+++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java
@@ -44,7 +44,7 @@ public void testModelAreStoredCompact() throws Exception {
 
     final String JSONOnDisk = Files.readString(chatModelStoreFile, StandardCharsets.UTF_8);
     Object objectFromDisk = Utils.fromJSONString(JSONOnDisk);
-    assertEquals(new String(Utils.toJSON(objectFromDisk, -1), UTF_8), JSONOnDisk);
+    assertEquals(new String(Utils.toJSON(objectFromDisk), UTF_8), JSONOnDisk);
   }
 
   @Test
@@ -79,6 +79,10 @@ public void testModelStorePersistence() throws Exception {
         ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-openAI'");
     assertJQ(
         ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gpt-5.4-nano'");
+    assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/timeout==60");
+    assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true");
+    assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true");
+    assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5");
 
     // check persistence after restart
     getJetty().stop();
@@ -98,4 +102,4 @@ public void testModelStorePersistence() throws Exception {
     getJetty().start();
     assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/==[]");
   }
-}
\ No newline at end of file
+}
diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestManagedChatModelStoreInitialization.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestManagedChatModelStoreInitialization.java
index 0106558401a8..14ee4251efe0 100644
--- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestManagedChatModelStoreInitialization.java
+++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestManagedChatModelStoreInitialization.java
@@ -31,7 +31,7 @@ public void cleanUp() throws Exception {
   public void managedChatModelStore_whenUpdateRequestComponentConfigured_shouldBeInitialized()
       throws Exception {
     setupTest(
-        "solrconfig-document-enrichment-update-request-processor-only.xml",
+        "solrconfig-document-enrichment.xml",
         "schema-language-models.xml",
         false,
         false);
@@ -51,4 +51,4 @@ public void managedChatModelStore_whenNoComponents_shouldNotBeInitialized() thro
             + ManagedChatModelStore.REST_END_POINT
             + "'");
   }
-}
\ No newline at end of file
+}
diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java
index e92bded3c75e..1acf0aba6212 100644
--- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java
+++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java
@@ -16,7 +16,9 @@
  */
 package org.apache.solr.languagemodels.documentenrichment.update.processor;
 
+import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.util.NamedList;
@@ -88,6 +90,20 @@ public void init_multipleInputFields_shouldInitAllFields() {
     assertEquals(List.of("string_field", "body_field"), factory.getInputFields());
   }
 
+  @Test
+  public void init_arrInputField_shouldInitAllFields() {
+    NamedList args = new NamedList<>();
+    args.add("inputField", new ArrayList<>(List.of("string_field", "body_field")));
+    args.add("outputField", "enriched_field");
+    args.add("prompt", "Title: {string_field}. Body: {body_field}.");
+    args.add("model", "model1");
+
+    DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory();
+    factory.init(args);
+
+    assertEquals(List.of("string_field", "body_field"), factory.getInputFields());
+  }
+
   @Test
   public void init_noInputField_shouldThrowExceptionWithDetailedMessage() {
     NamedList args = new NamedList<>();
@@ -254,100 +270,89 @@ public void init_promptFileWithMissingPlaceholder_shouldThrowExceptionInInform()
   /* Following tests depend on a real solr schema and depend on BeforeClass-AfterClass methods */
 
   @Test
-  public void init_notExistentOutputField_shouldThrowExceptionWithDetailedMessage() {
-    NamedList args = new NamedList<>();
-    args.add("inputField", "string_field");
-    args.add("outputField", "notExistentOutput");
-    args.add("prompt", "Summarize: {string_field}");
-    args.add("model", "model1");
-
-    DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory();
-    ModifiableSolrParams params = new ModifiableSolrParams();
-    SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {};
-    factory.init(args);
-
-    SolrException e = assertThrows(SolrException.class, () -> factory.getInstance(req, null, null));
+  public void init_notExistentOutputField_shouldThrowExceptionWithDetailedMessage() throws Exception {
+    SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("string_field"),"notExistentOutput", collection1, "model1"));
     assertEquals("undefined field: \"notExistentOutput\"", e.getMessage());
+    restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1");
   }
 
   @Test
-  public void init_notTextualOutputField_shouldThrowExceptionWithDetailedMessage() {
-    // vector is a DenseVectorField — not a textual field
-    NamedList args = new NamedList<>();
-    args.add("inputField", "string_field");
-    args.add("outputField", "vector");
-    args.add("prompt", "Summarize: {string_field}");
-    args.add("model", "model1");
-
-    DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory();
-    ModifiableSolrParams params = new ModifiableSolrParams();
-    SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {};
-    factory.init(args);
-
-    SolrException e = assertThrows(SolrException.class, () -> factory.getInstance(req, null, null));
+  public void init_notTextualOutputField_shouldThrowExceptionWithDetailedMessage() throws Exception{
+    // vector is a DenseVectorField and it's not supported
+    SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("string_field"), "vector", collection1, "model1"));
     assertEquals(
         "field type is not supported by Document Enrichment: DenseVectorField", e.getMessage());
+    restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1");
   }
 
   @Test
-  public void init_notExistentInputField_shouldThrowExceptionWithDetailedMessage() {
-    NamedList args = new NamedList<>();
-    args.add("inputField", "notExistentInput");
-    args.add("outputField", "enriched_field");
-    args.add("prompt", "Summarize: {notExistentInput}");
-    args.add("model", "model1");
-
-    DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory();
-    ModifiableSolrParams params = new ModifiableSolrParams();
-    SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {};
-    factory.init(args);
+  public void init_unsupportedOutputFieldType_shouldThrowExceptionWithDetailedMessage() throws Exception {
+    // output_binary is a BinaryField, which is not supported (and is not DenseVectorField)
+    SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("string_field"), "output_binary", collection1, "model1"));
+    assertEquals(
+        "field type is not supported by Document Enrichment: BinaryField", e.getMessage());
+    restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1");
+  }
 
-    SolrException e = assertThrows(SolrException.class, () -> factory.getInstance(req, null, null));
+  @Test
+  public void init_notExistentInputField_shouldThrowExceptionWithDetailedMessage() throws Exception {
+    SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("notExistentInput"), "enriched_field", collection1, "model1"));
     assertEquals("undefined field: \"notExistentInput\"", e.getMessage());
+    restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1");
   }
 
   @Test
-  public void init_multipleInputFields_oneNotExistent_shouldThrowExceptionWithDetailedMessage() {
-    NamedList args = new NamedList<>();
-    args.add("inputField", "string_field");
-    args.add("inputField", "notExistentInput");
-    args.add("outputField", "enriched_field");
-    args.add("prompt", "Title: {string_field}. Body: {notExistentInput}.");
-    args.add("model", "model1");
-
-    DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory();
-    ModifiableSolrParams params = new ModifiableSolrParams();
-    SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {};
-    factory.init(args);
-
-    SolrException e = assertThrows(SolrException.class, () -> factory.getInstance(req, null, null));
+  public void init_multipleInputFields_oneNotExistent_shouldThrowExceptionWithDetailedMessage() throws Exception {
+    SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("string_field", "notExistentInput"), "enriched_field_multi", collection1, "model1"));
     assertEquals("undefined field: \"notExistentInput\"", e.getMessage());
+    restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1");
   }
 
   @Test
   public void init_multivaluedStringOutputField_shouldNotThrowException() throws Exception {
     UpdateRequestProcessor instance =
-        createUpdateProcessor("string_field", "enriched_field_multi", collection1, "model-mv");
+        createUpdateProcessor(List.of("string_field"), "enriched_field_multi", collection1, "model1");
     assertNotNull(instance);
-    restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model-mv");
+    restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1");
   }
 
+  /* buildResponseFormat tests for field types from the Solr documentation */
+
   @Test
-  public void init_multivaluedStringOutputField_buildResponseFormat_shouldProduceArraySchema() throws Exception {
-    NamedList args = new NamedList<>();
-    ManagedChatModelStore.getManagedModelStore(collection1)
-        .addModel(new SolrChatModel("model-rf", null, null));
-    args.add("inputField", "string_field");
-    args.add("outputField", "enriched_field_multi");
-    args.add("prompt", "Summarize: {string_field}");
-    args.add("model", "model-rf");
+  public void buildResponseFormat_unsupportedFieldTypes_shouldThrowUnsupportedFieldTypeException() {
+    var cases = Map.of(
+        "output_collation",   "CollationField",
+        "output_date_range",  "DateRangeField",
+        "output_enum",        "EnumFieldType",
+        "output_lat_lon",     "LatLonPointSpatialField",
+        "output_random_sort", "RandomSortField",
+        "output_rank",        "RankField",
+        "output_uuid",        "UUIDField",
+        "output_nest_path",   "NestPathField"
+    );
+    var schema = collection1.getLatestSchema();
+    cases.forEach((fieldName, expectedTypeName) -> {
+      var schemaField = schema.getField(fieldName);
+      SolrException e = assertThrows(SolrException.class,
+          () -> DocumentEnrichmentUpdateProcessorFactory.buildResponseFormat(schemaField));
+      assertEquals(
+          "field type is not supported by Document Enrichment: " + expectedTypeName,
+          e.getMessage());
+    });
+  }
 
-    DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory();
-    factory.init(args);
-    ModifiableSolrParams params = new ModifiableSolrParams();
-    SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {};
-    assertNotNull(factory.getInstance(req, null, null));
+  @Test
+  public void init_sortableTextOutputField_buildResponseFormat_shouldProduceStringSchema() {
+    var schemaField = collection1.getLatestSchema().getField("output_sortable_text");
+    var responseFormat = DocumentEnrichmentUpdateProcessorFactory.buildResponseFormat(schemaField);
+    assertNotNull(responseFormat);
+    assertEquals(
+        dev.langchain4j.model.chat.request.ResponseFormatType.JSON, responseFormat.type());
+    assertNotNull(responseFormat.jsonSchema());
+  }
 
+  @Test
+  public void init_multivaluedStringOutputField_buildResponseFormat_shouldProduceArraySchema() {
     // verify the ResponseFormat is constructed correctly for the multivalued field
     var schema = collection1.getLatestSchema();
     var schemaField = schema.getField("enriched_field_multi");
@@ -357,7 +362,6 @@ public void init_multivaluedStringOutputField_buildResponseFormat_shouldProduceA
     assertEquals(
         dev.langchain4j.model.chat.request.ResponseFormatType.JSON, responseFormat.type());
     assertNotNull(responseFormat.jsonSchema());
-    restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model-rf");
   }
 
   @Test
@@ -375,40 +379,31 @@ public void init_singleValuedStringOutputField_buildResponseFormat_shouldProduce
   @Test
   public void init_dynamicInputField_shouldNotThrowException() throws Exception{
     UpdateRequestProcessor instance =
-        createUpdateProcessor("text_s", "enriched_field", collection1, "model2");
+        createUpdateProcessor(List.of("text_s"), "enriched_field", collection1, "model1");
     assertNotNull(instance);
-    restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model2");
+    restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1");
   }
 
   @Test
   public void init_multipleDynamicInputFields_shouldNotThrowException() throws Exception{
-    NamedList args = new NamedList<>();
-    ManagedChatModelStore.getManagedModelStore(collection1)
-        .addModel(new SolrChatModel("model1", null, null));
-    args.add("inputField", "text_s");
-    args.add("inputField", "body_field");
-    args.add("outputField", "enriched_field");
-    args.add("prompt", "Title: {text_s}. Body: {body_field}.");
-    args.add("model", "model1");
-
-    DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory();
-    ModifiableSolrParams params = new ModifiableSolrParams();
-    factory.init(args);
-
-    SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {};
-    assertNotNull(factory.getInstance(req, null, null));
+    UpdateRequestProcessor instance =
+        createUpdateProcessor(List.of("text_s", "body_field"), "enriched_field", collection1, "model1");
+    assertNotNull(instance);
     restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1");
   }
 
   private UpdateRequestProcessor createUpdateProcessor(
-      String inputFieldName, String outputFieldName, SolrCore core, String modelName) {
+      List inputFieldNames, String outputFieldName, SolrCore core, String modelName)
+  throws Exception {
     NamedList args = new NamedList<>();
 
     ManagedChatModelStore.getManagedModelStore(core)
         .addModel(new SolrChatModel(modelName, null, null));
-    args.add("inputField", inputFieldName);
+    for  (String fieldName : inputFieldNames) {
+      args.add("inputField", fieldName);
+    }
     args.add("outputField", outputFieldName);
-    args.add("prompt", "Summarize: {" + inputFieldName + "}");
+    args.add("prompt", "Summarize: {" + String.join("}. {", inputFieldNames) + "}.");
     args.add("model", modelName);
 
     DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory();
diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java
index 048e073da9f0..d499a90ecbfa 100644
--- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java
+++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java
@@ -17,13 +17,18 @@
 package org.apache.solr.languagemodels.documentenrichment.update.processor;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
 import java.util.Map;
 import org.apache.solr.client.solrj.RemoteSolrException;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.request.SolrQuery;
 import org.apache.solr.client.solrj.request.UpdateRequest;
 import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.util.Utils;
 import org.apache.solr.languagemodels.TestLanguageModelBase;
+import org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel;
 import org.apache.solr.languagemodels.documentenrichment.store.rest.ManagedChatModelStore;
 import org.junit.After;
 import org.junit.AfterClass;
@@ -57,6 +62,15 @@ private void loadTestChatModel(String fileName, String modelId) throws Exception
     loadedModelId = modelId;
   }
 
+  private void loadDummyChatModel(String modelId, String response) throws Exception {
+    Map model = new LinkedHashMap<>();
+    model.put("class", "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel");
+    model.put("name", modelId);
+    model.put("params", Map.of("response", response));
+    assertJPut(ManagedChatModelStore.REST_END_POINT, Utils.toJSONString(model), "/responseHeader/status==0");
+    loadedModelId = modelId;
+  }
+
   @Test
   public void processAdd_inputField_shouldEnrichInputField() throws Exception {
     loadTestChatModel("dummy-chat-model.json", "dummy-chat-1");
@@ -187,18 +201,26 @@ public void processAtomicUpdate_shouldReplaceExistingEnrichedFieldNotAppend() th
     // modified via atomic update, the enriched content is recomputed and replaces the previous
     // value rather than being appended.
     loadTestChatModel("dummy-chat-model.json", "dummy-chat-1");
-    addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment");
+    assertU(adoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "enriched_field", "old content"));
     addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment");
     assertU(commit());
 
+    final SolrQuery query = getEnrichmentQuery("enriched_field");
+
+    assertJQ(
+        "/query" + query.toQueryString(),
+        "/response/numFound==2]",
+        "/response/docs/[0]/id=='99'",
+        "/response/docs/[0]/enriched_field=='old content'",
+        "/response/docs/[1]/id=='98'",
+        "/response/docs/[1]/enriched_field=='enriched content'");
+
     SolrInputDocument atomicDoc = new SolrInputDocument();
     atomicDoc.setField("id", "99");
     atomicDoc.setField("string_field", Map.of("set", "Vegeta is the saiyan prince from the Dragon Ball series."));
     addWithChain(atomicDoc, "documentEnrichmentForPartialUpdates");
     assertU(commit());
 
-    final SolrQuery query = getEnrichmentQuery("enriched_field");
-
     assertJQ(
         "/query" + query.toQueryString(),
         "/response/numFound==2]",
@@ -211,9 +233,38 @@ public void processAtomicUpdate_shouldReplaceExistingEnrichedFieldNotAppend() th
   // --- multi-field tests ---
 
   @Test
-  public void processAdd_multipleInputFields_allPresent_shouldEnrichDocument() throws Exception {
+  public void processAdd_arrInputField_shouldEnrichDocument() throws Exception {
+    // Verifies that  in solrconfig behaves identically to
+    // multiple  entries — both are accepted by removeConfigArgs.
+    loadTestChatModel("dummy-chat-model.json", "dummy-chat-1");
+
+    DummyChatModel.lastReceivedPrompt = null;
+
+    addWithChain(
+        sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "body_field", "He is very proud."),
+        "documentEnrichmentArrInputField");
+    addWithChain(
+        sdoc("id", "98", "string_field", "Kakaroth is a saiyan.", "body_field", "He grew up on Earth."),
+        "documentEnrichmentArrInputField");
+    assertU(commit());
+
+    final SolrQuery query = getEnrichmentQuery("enriched_field");
+
+    assertJQ(
+        "/query" + query.toQueryString(),
+        "/response/numFound==2]",
+        "/response/docs/[0]/id=='99'",
+        "/response/docs/[0]/enriched_field=='enriched content'",
+        "/response/docs/[1]/id=='98'",
+        "/response/docs/[1]/enriched_field=='enriched content'");
+  }
+
+  @Test
+  public void processAdd_multipleInputFields_allPresent_shouldEnrichDocumentWithBothFields() throws Exception {
     loadTestChatModel("dummy-chat-model.json", "dummy-chat-1");
 
+    DummyChatModel.lastReceivedPrompt = null;
+
     addWithChain(
         sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "body_field", "He is very proud."),
         "documentEnrichmentMultiField");
@@ -231,6 +282,11 @@ public void processAdd_multipleInputFields_allPresent_shouldEnrichDocument() thr
         "/response/docs/[0]/enriched_field=='enriched content'",
         "/response/docs/[1]/id=='98'",
         "/response/docs/[1]/enriched_field=='enriched content'");
+
+    // Verify both placeholders were substituted
+    assertEquals(
+        "Title: Kakaroth is a saiyan.. Body: He grew up on Earth..",
+        DummyChatModel.lastReceivedPrompt);
   }
 
   @Test
@@ -322,34 +378,42 @@ public void processAdd_multipleInputFields_failingModel_shouldLogAndSkipEnrichme
   }
 
   @Test
-  public void processAdd_multivaluedStringOutputField_shouldPopulateAllValues() throws Exception {
-    loadTestChatModel("dummy-chat-model-multivalued-string.json", "dummy-chat-multivalued-1");
+  public void processAdd_multivaluedInputField_shouldInterpolateCollectionAndEnrichDocument() throws Exception {
+    // When an input field is multivalued, SolrInputField.getValue() returns the Collection,
+    // whose toString() is used for prompt interpolation (e.g. "[tag1, tag2, tag3]").
+    // Enrichment must proceed — the collection is non-null and non-empty.
+    loadTestChatModel("dummy-chat-model.json", "dummy-chat-1");
 
-    addWithChain(
-        sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."),
-        "documentEnrichmentMultivaluedString");
-    addWithChain(
-        sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."),
-        "documentEnrichmentMultivaluedString");
+    DummyChatModel.lastReceivedPrompt = null;
+
+    addWithChain(sdoc("id", "98", "tags_field", "tag1"), "documentEnrichmentMultivaluedInput");
+
+    SolrInputDocument doc = new SolrInputDocument();
+    doc.addField("id", "99");
+    doc.addField("tags_field", "tag1");
+    doc.addField("tags_field", "tag2");
+    doc.addField("tags_field", "tag3");
+    addWithChain(doc, "documentEnrichmentMultivaluedInput");
     assertU(commit());
 
-    final SolrQuery query = getEnrichmentQuery("enriched_field_multi");
+    final SolrQuery query = getEnrichmentQuery("enriched_field");
 
     assertJQ(
         "/query" + query.toQueryString(),
         "/response/numFound==2]",
         "/response/docs/[0]/id=='99'",
-        "/response/docs/[0]/enriched_field_multi/[0]=='tag1'",
-        "/response/docs/[0]/enriched_field_multi/[1]=='tag2'",
+        "/response/docs/[0]/enriched_field=='enriched content'",
         "/response/docs/[1]/id=='98'",
-        "/response/docs/[1]/enriched_field_multi/[0]=='tag1'",
-        "/response/docs/[1]/enriched_field_multi/[1]=='tag2'");
+        "/response/docs/[1]/enriched_field=='enriched content'");
+
+    assertEquals(
+        "Classify these tags: [tag1, tag2, tag3]", DummyChatModel.lastReceivedPrompt);
   }
 
   @Test
   public void processAdd_multivaluedStringOutputField_emptyInput_shouldSkipEnrichment()
       throws Exception {
-    loadTestChatModel("dummy-chat-model-multivalued-string.json", "dummy-chat-multivalued-1");
+    loadDummyChatModel("dummy-chat-multivalued-1", "{\"value\": [\"tag1\", \"tag2\"]}");
 
     addWithChain(sdoc("id", "99", "string_field", ""), "documentEnrichmentMultivaluedString");
     addWithChain(sdoc("id", "98", "string_field", ""), "documentEnrichmentMultivaluedString");
@@ -369,317 +433,216 @@ public void processAdd_multivaluedStringOutputField_emptyInput_shouldSkipEnrichm
   // --- typed single-valued output field tests ---
 
   @Test
-  public void processAdd_singleLongOutputField_shouldPopulateValue() throws Exception {
-    loadTestChatModel("dummy-chat-model-single-long.json", "dummy-long");
-
-    addWithChain(sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleLong");
-    addWithChain(sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleLong");
-    assertU(commit());
-
-    final SolrQuery query = getEnrichmentQuery("output_long");
-
-    assertJQ(
-        "/query" + query.toQueryString(),
-        "/response/numFound==2]",
-        "/response/docs/[0]/id=='99'",
-        "/response/docs/[0]/output_long==42",
-        "/response/docs/[1]/id=='98'",
-        "/response/docs/[1]/output_long==42");
-  }
-
-  @Test
-  public void processAdd_singleIntOutputField_shouldPopulateValue() throws Exception {
-    loadTestChatModel("dummy-chat-model-single-int.json", "dummy-int");
-
-    addWithChain(sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleInt");
-    addWithChain(sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleInt");
-    assertU(commit());
-
-    final SolrQuery query = getEnrichmentQuery("output_int");
-
-    assertJQ(
-        "/query" + query.toQueryString(),
-        "/response/numFound==2]",
-        "/response/docs/[0]/id=='99'",
-        "/response/docs/[0]/output_int==7",
-        "/response/docs/[1]/id=='98'",
-        "/response/docs/[1]/output_int==7");
-  }
-
-  @Test
-  public void processAdd_singleFloatOutputField_shouldPopulateValue() throws Exception {
-    loadTestChatModel("dummy-chat-model-single-float.json", "dummy-float");
-
-    addWithChain(sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleFloat");
-    addWithChain(sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleFloat");
-    assertU(commit());
-
-    final SolrQuery query = getEnrichmentQuery("output_float");
-
-    assertJQ(
-        "/query" + query.toQueryString(),
-        "/response/numFound==2]",
-        "/response/docs/[0]/id=='99'",
-        "/response/docs/[0]/output_float==1.5",
-        "/response/docs/[1]/id=='98'",
-        "/response/docs/[1]/output_float==1.5");
-  }
-
-  @Test
-  public void processAdd_singleDoubleOutputField_shouldPopulateValue() throws Exception {
-    loadTestChatModel("dummy-chat-model-single-double.json", "dummy-double");
-
-    addWithChain(
-        sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleDouble");
-    addWithChain(
-        sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleDouble");
-    assertU(commit());
-
-    final SolrQuery query = getEnrichmentQuery("output_double");
-
-    assertJQ(
-        "/query" + query.toQueryString(),
-        "/response/numFound==2]",
-        "/response/docs/[0]/id=='99'",
-        "/response/docs/[0]/output_double==2.5",
-        "/response/docs/[1]/id=='98'",
-        "/response/docs/[1]/output_double==2.5");
-  }
-
-  @Test
-  public void processAdd_singleBooleanOutputField_shouldPopulateValue() throws Exception {
-    loadTestChatModel("dummy-chat-model-single-boolean.json", "dummy-boolean");
-
-    addWithChain(
-        sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleBoolean");
-    addWithChain(
-        sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleBoolean");
-    assertU(commit());
-
-    final SolrQuery query = getEnrichmentQuery("output_boolean");
-
-    assertJQ(
-        "/query" + query.toQueryString(),
-        "/response/numFound==2]",
-        "/response/docs/[0]/id=='99'",
-        "/response/docs/[0]/output_boolean==true",
-        "/response/docs/[1]/id=='98'",
-        "/response/docs/[1]/output_boolean==true");
-  }
-
-  @Test
-  public void processAdd_singleDateOutputField_shouldPopulateValue() throws Exception {
-    loadTestChatModel("dummy-chat-model-single-date.json", "dummy-date");
-
-    addWithChain(sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleDate");
-    addWithChain(sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleDate");
-    assertU(commit());
-
-    final SolrQuery query = getEnrichmentQuery("output_date");
-
-    assertJQ(
-        "/query" + query.toQueryString(),
-        "/response/numFound==2]",
-        "/response/docs/[0]/id=='99'",
-        "/response/docs/[0]/output_date=='2024-01-15T00:00:00Z'",
-        "/response/docs/[1]/id=='98'",
-        "/response/docs/[1]/output_date=='2024-01-15T00:00:00Z'");
+  public void processAdd_singleTypedOutputField_shouldPopulateValue() throws Exception {
+    record TypedCase(String modelId, String response, String chain, String field, String expectedValue) {}
+    List cases = List.of(
+        new TypedCase("dummy-long",    "{\"value\": 3000000000}",               "documentEnrichmentSingleLong",    "output_long",    "3000000000"),
+        new TypedCase("dummy-int",     "{\"value\": 7}",                        "documentEnrichmentSingleInt",     "output_int",     "7"),
+        new TypedCase("dummy-float",   "{\"value\": 1.5}",                      "documentEnrichmentSingleFloat",   "output_float",   "1.5"),
+        new TypedCase("dummy-double",  "{\"value\": 1e308}",                    "documentEnrichmentSingleDouble",  "output_double",  "1e308"),
+        new TypedCase("dummy-boolean", "{\"value\": true}",                     "documentEnrichmentSingleBoolean", "output_boolean", "true"),
+        new TypedCase("dummy-date",    "{\"value\": \"2024-01-15T00:00:00Z\"}", "documentEnrichmentSingleDate",    "output_date",    "'2024-01-15T00:00:00Z'")
+    );
+
+    for (TypedCase c : cases) {
+      loadDummyChatModel(c.modelId(), c.response());
+      addWithChain(sdoc("id", "99", "string_field", "some content"), c.chain());
+      addWithChain(sdoc("id", "98", "string_field", "other content"), c.chain());
+      assertU(commit());
+
+      final SolrQuery query = getEnrichmentQuery(c.field());
+      assertJQ(
+          "/query" + query.toQueryString(),
+          "/response/numFound==2]",
+          "/response/docs/[0]/id=='99'",
+          "/response/docs/[0]/" + c.field() + "==" + c.expectedValue(),
+          "/response/docs/[1]/id=='98'",
+          "/response/docs/[1]/" + c.field() + "==" + c.expectedValue());
+
+      restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + c.modelId());
+      loadedModelId = null;
+    }
   }
 
   // --- typed multivalued output field tests ---
 
   @Test
-  public void processAdd_multivaluedLongOutputField_shouldPopulateAllValues() throws Exception {
-    loadTestChatModel("dummy-chat-model-multivalued-long.json", "dummy-long-multi");
-
-    addWithChain(
-        sdoc("id", "99", "string_field", "some content"), "documentEnrichmentMultivaluedLong");
-    addWithChain(
-        sdoc("id", "98", "string_field", "other content"), "documentEnrichmentMultivaluedLong");
-    assertU(commit());
-
-    final SolrQuery query = getEnrichmentQuery("output_long_multi");
-
-    assertJQ(
-        "/query" + query.toQueryString(),
-        "/response/numFound==2]",
-        "/response/docs/[0]/id=='99'",
-        "/response/docs/[0]/output_long_multi/[0]==10",
-        "/response/docs/[0]/output_long_multi/[1]==20",
-        "/response/docs/[0]/output_long_multi/[2]==30",
-        "/response/docs/[1]/id=='98'",
-        "/response/docs/[1]/output_long_multi/[0]==10",
-        "/response/docs/[1]/output_long_multi/[1]==20",
-        "/response/docs/[1]/output_long_multi/[2]==30");
+  public void processAdd_multivaluedTypedOutputField_shouldPopulateAllValues() throws Exception {
+    record TypeCaseMulti(String modelId, String response, String chain, String field, List expectedValues) {}
+    List cases = List.of(
+        new TypeCaseMulti("dummy-chat-multivalued-1", "{\"value\": [\"tag1\", \"tag2\"]}",
+            "documentEnrichmentMultivaluedString",  "enriched_field_multi",  List.of("'tag1'", "'tag2'")),
+        new TypeCaseMulti("dummy-long-multi",  "{\"value\": [1000000000, 2000000000, 3000000000]}",
+            "documentEnrichmentMultivaluedLong",    "output_long_multi",     List.of("1000000000", "2000000000", "3000000000")),
+        new TypeCaseMulti("dummy-int-multi",   "{\"value\": [1, 2]}",
+            "documentEnrichmentMultivaluedInt",     "output_int_multi",      List.of("1", "2")),
+        new TypeCaseMulti("dummy-float-multi", "{\"value\": [1.5, 2.5]}",
+            "documentEnrichmentMultivaluedFloat",   "output_float_multi",    List.of("1.5", "2.5")),
+        new TypeCaseMulti("dummy-double-multi","{\"value\": [1e308, 1.1e308]}",
+            "documentEnrichmentMultivaluedDouble",  "output_double_multi",   List.of("1e308", "1.1e308")),
+        new TypeCaseMulti("dummy-boolean-multi", "{\"value\": [true, false]}",
+            "documentEnrichmentMultivaluedBoolean", "output_boolean_multi",  List.of("true", "false")),
+        new TypeCaseMulti("dummy-date-multi",  "{\"value\": [\"2024-01-15T00:00:00Z\", \"2025-06-30T00:00:00Z\"]}",
+            "documentEnrichmentMultivaluedDate",    "output_date_multi",     List.of("'2024-01-15T00:00:00Z'", "'2025-06-30T00:00:00Z'"))
+    );
+
+    for (TypeCaseMulti c : cases) {
+      loadDummyChatModel(c.modelId(), c.response());
+      addWithChain(sdoc("id", "99", "string_field", "some content"), c.chain());
+      addWithChain(sdoc("id", "98", "string_field", "other content"), c.chain());
+      assertU(commit());
+
+      final SolrQuery query = getEnrichmentQuery(c.field());
+      List assertions = new ArrayList<>();
+      assertions.add("/response/numFound==2]");
+      for (int docIdx = 0; docIdx < 2; docIdx++) {
+        String docId = docIdx == 0 ? "'99'" : "'98'";
+        assertions.add("/response/docs/[" + docIdx + "]/id==" + docId);
+        for (int i = 0; i < c.expectedValues().size(); i++) {
+          assertions.add("/response/docs/[" + docIdx + "]/" + c.field() + "/[" + i + "]==" + c.expectedValues().get(i));
+        }
+      }
+      assertJQ("/query" + query.toQueryString(), assertions.toArray(new String[0]));
+
+      restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + c.modelId());
+      loadedModelId = null;
+    }
   }
 
-  @Test
-  public void processAdd_multivaluedIntOutputField_shouldPopulateAllValues() throws Exception {
-    loadTestChatModel("dummy-chat-model-multivalued-int.json", "dummy-int-multi");
-
-    addWithChain(
-        sdoc("id", "99", "string_field", "some content"), "documentEnrichmentMultivaluedInt");
-    addWithChain(
-        sdoc("id", "98", "string_field", "other content"), "documentEnrichmentMultivaluedInt");
-    assertU(commit());
-
-    final SolrQuery query = getEnrichmentQuery("output_int_multi");
-
-    assertJQ(
-        "/query" + query.toQueryString(),
-        "/response/numFound==2]",
-        "/response/docs/[0]/id=='99'",
-        "/response/docs/[0]/output_int_multi/[0]==1",
-        "/response/docs/[0]/output_int_multi/[1]==2",
-        "/response/docs/[1]/id=='98'",
-        "/response/docs/[1]/output_int_multi/[0]==1",
-        "/response/docs/[1]/output_int_multi/[1]==2");
-  }
+  // --- LLM response contract violation tests ---
 
   @Test
-  public void processAdd_multivaluedFloatOutputField_shouldPopulateAllValues() throws Exception {
-    loadTestChatModel("dummy-chat-model-multivalued-float.json", "dummy-float-multi");
+  public void processAdd_llmResponseMissingValueKey_shouldLogAndIndexWithNoEnrichedField()
+      throws Exception {
+    // Model returns valid JSON but without the required "value" key
+    loadDummyChatModel("dummy-chat-1", "{\"result\": \"some value\"}");
 
-    addWithChain(
-        sdoc("id", "99", "string_field", "some content"), "documentEnrichmentMultivaluedFloat");
-    addWithChain(
-        sdoc("id", "98", "string_field", "other content"), "documentEnrichmentMultivaluedFloat");
+    addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment");
+    addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment");
     assertU(commit());
 
-    final SolrQuery query = getEnrichmentQuery("output_float_multi");
+    final SolrQuery query = getEnrichmentQuery("enriched_field");
 
     assertJQ(
         "/query" + query.toQueryString(),
         "/response/numFound==2]",
         "/response/docs/[0]/id=='99'",
-        "/response/docs/[0]/output_float_multi/[0]==1.5",
-        "/response/docs/[0]/output_float_multi/[1]==2.5",
+        "!/response/docs/[0]/enriched_field==",
         "/response/docs/[1]/id=='98'",
-        "/response/docs/[1]/output_float_multi/[0]==1.5",
-        "/response/docs/[1]/output_float_multi/[1]==2.5");
+        "!/response/docs/[1]/enriched_field==");
   }
 
   @Test
-  public void processAdd_multivaluedDoubleOutputField_shouldPopulateAllValues() throws Exception {
-    loadTestChatModel("dummy-chat-model-multivalued-double.json", "dummy-double-multi");
+  public void processAdd_llmResponseMalformedJson_shouldLogAndIndexWithNoEnrichedField()
+      throws Exception {
+    // Model returns a plain string that cannot be parsed as JSON
+    loadDummyChatModel("dummy-chat-1", "not valid json at all");
 
-    addWithChain(
-        sdoc("id", "99", "string_field", "some content"), "documentEnrichmentMultivaluedDouble");
-    addWithChain(
-        sdoc("id", "98", "string_field", "other content"), "documentEnrichmentMultivaluedDouble");
+    addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment");
+    addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment");
     assertU(commit());
 
-    final SolrQuery query = getEnrichmentQuery("output_double_multi");
+    final SolrQuery query = getEnrichmentQuery("enriched_field");
 
     assertJQ(
         "/query" + query.toQueryString(),
         "/response/numFound==2]",
         "/response/docs/[0]/id=='99'",
-        "/response/docs/[0]/output_double_multi/[0]==3.14",
-        "/response/docs/[0]/output_double_multi/[1]==2.71",
+        "!/response/docs/[0]/enriched_field==",
         "/response/docs/[1]/id=='98'",
-        "/response/docs/[1]/output_double_multi/[0]==3.14",
-        "/response/docs/[1]/output_double_multi/[1]==2.71");
+        "!/response/docs/[1]/enriched_field==");
   }
 
-  @Test
-  public void processAdd_multivaluedBooleanOutputField_shouldPopulateAllValues() throws Exception {
-    loadTestChatModel("dummy-chat-model-multivalued-boolean.json", "dummy-boolean-multi");
+  // --- field type incompatibility tests ---
 
-    addWithChain(
-        sdoc("id", "99", "string_field", "some content"), "documentEnrichmentMultivaluedBoolean");
-    addWithChain(
-        sdoc("id", "98", "string_field", "other content"), "documentEnrichmentMultivaluedBoolean");
-    assertU(commit());
-
-    final SolrQuery query = getEnrichmentQuery("output_boolean_multi");
-
-    assertJQ(
-        "/query" + query.toQueryString(),
-        "/response/numFound==2]",
-        "/response/docs/[0]/id=='99'",
-        "/response/docs/[0]/output_boolean_multi/[0]==true",
-        "/response/docs/[0]/output_boolean_multi/[1]==false",
-        "/response/docs/[1]/id=='98'",
-        "/response/docs/[1]/output_boolean_multi/[0]==true",
-        "/response/docs/[1]/output_boolean_multi/[1]==false");
+  @Test
+  public void processAdd_dateOutputField_malformedDateString_shouldFailToIndex() throws Exception {
+    // DatePointField requires a full ISO-8601 datetime string (e.g. "2024-01-15T00:00:00Z").
+    // A date-only string like "2024-01-15" (missing time component) cannot be parsed by
+    // DateMathParser and causes the update to fail.
+    // Unlike model exceptions (caught inside processAdd), this error occurs during Solr field
+    // conversion in super.processAdd() and propagates as a RemoteSolrException to the caller.
+    loadDummyChatModel("dummy-date", "{\"value\": \"2024-01-15\"}");
+
+    assertThrows(
+        "date string without time component should fail to index",
+        RemoteSolrException.class,
+        () ->
+            addWithChain(
+                sdoc("id", "99", "string_field", "some content"),
+                "documentEnrichmentSingleDate"));
   }
 
   @Test
-  public void processAdd_multivaluedDateOutputField_shouldPopulateAllValues() throws Exception {
-    loadTestChatModel("dummy-chat-model-multivalued-date.json", "dummy-date-multi");
+  public void processAdd_intOutputField_decimalResponse_shouldTruncateAndIndex() throws Exception {
+    // JSON numbers with a decimal point are parsed as Double (e.g. 3.7 → Double(3.7)).
+    // When stored in a pint field, IntPointField converts via ((Number) value).intValue(),
+    // silently truncating to 3. No exception is thrown — the document is indexed with the
+    // truncated integer value.
+    loadDummyChatModel("dummy-int", "{\"value\": 3.7}");
 
-    addWithChain(
-        sdoc("id", "99", "string_field", "some content"), "documentEnrichmentMultivaluedDate");
-    addWithChain(
-        sdoc("id", "98", "string_field", "other content"), "documentEnrichmentMultivaluedDate");
+    addWithChain(sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleInt");
+    addWithChain(sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleInt");
     assertU(commit());
 
-    final SolrQuery query = getEnrichmentQuery("output_date_multi");
-
+    final SolrQuery query = getEnrichmentQuery("output_int");
     assertJQ(
         "/query" + query.toQueryString(),
         "/response/numFound==2]",
         "/response/docs/[0]/id=='99'",
-        "/response/docs/[0]/output_date_multi/[0]=='2024-01-15T00:00:00Z'",
-        "/response/docs/[0]/output_date_multi/[1]=='2025-06-30T00:00:00Z'",
+        "/response/docs/[0]/output_int==3",
         "/response/docs/[1]/id=='98'",
-        "/response/docs/[1]/output_date_multi/[0]=='2024-01-15T00:00:00Z'",
-        "/response/docs/[1]/output_date_multi/[1]=='2025-06-30T00:00:00Z'");
+        "/response/docs/[1]/output_int==3");
   }
 
-  // --- LLM response contract violation tests ---
-
   @Test
-  public void processAdd_llmResponseMissingValueKey_shouldLogAndIndexWithNoEnrichedField()
-      throws Exception {
-    // Model returns valid JSON but without the required "value" key
-    loadTestChatModel("dummy-chat-model-missing-value-key.json", "dummy-chat-1");
+  public void processAdd_doubleOutputField_intResponse_shouldConvertAndIndex() throws Exception {
+    loadDummyChatModel("dummy-double", "{\"value\": 3}");
 
-    addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment");
-    addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment");
+    addWithChain(sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleDouble");
+    addWithChain(sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleDouble");
     assertU(commit());
 
-    final SolrQuery query = getEnrichmentQuery("enriched_field");
-
+    final SolrQuery query = getEnrichmentQuery("output_double");
     assertJQ(
         "/query" + query.toQueryString(),
         "/response/numFound==2]",
         "/response/docs/[0]/id=='99'",
-        "!/response/docs/[0]/enriched_field==",
+        "/response/docs/[0]/output_double==3.0",
         "/response/docs/[1]/id=='98'",
-        "!/response/docs/[1]/enriched_field==");
+        "/response/docs/[1]/output_double==3.0");
   }
 
   @Test
-  public void processAdd_llmResponseMalformedJson_shouldLogAndIndexWithNoEnrichedField()
+  public void processAdd_floatOutputField_doubleResponse_shouldRoundToFloatPrecision()
       throws Exception {
-    // Model returns a plain string that cannot be parsed as JSON
-    loadTestChatModel("dummy-chat-model-malformed-json.json", "dummy-chat-1");
+    // JSON numbers are always parsed as Double. When stored in a pfloat field,
+    // FloatPointField converts via ((Number) value).floatValue(), silently rounding to float
+    // precision. No exception is thrown — the document is indexed with the rounded value.
+    loadDummyChatModel("dummy-float", "{\"value\": 3.141592653589793}");
 
-    addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment");
-    addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment");
+    addWithChain(sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleFloat");
+    addWithChain(sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleFloat");
     assertU(commit());
 
-    final SolrQuery query = getEnrichmentQuery("enriched_field");
-
+    final SolrQuery query = getEnrichmentQuery("output_float");
     assertJQ(
         "/query" + query.toQueryString(),
         "/response/numFound==2]",
         "/response/docs/[0]/id=='99'",
-        "!/response/docs/[0]/enriched_field==",
+        "/response/docs/[0]/output_float==3.1415927",
         "/response/docs/[1]/id=='98'",
-        "!/response/docs/[1]/enriched_field==");
+        "/response/docs/[1]/output_float==3.1415927");
   }
 
   // --- multivalued output field / scalar response test ---
 
   @Test
-  public void processAdd_multivaluedOutputField_scalarLlmResponse_shouldStoreSingleValue()
+  public void processAdd_multivaluedOutputField_singleValuedLlmResponse_shouldStoreSingleValue()
       throws Exception {
     // Model returns {"value": "a single string"} for a multivalued output field.
-    // The scalar falls through the List check and is stored as a single-element value.
-    loadTestChatModel("dummy-chat-model-multivalued-scalar.json", "dummy-chat-multivalued-1");
+    // The scalar is stored as a single-element multivalued field content (e.g., list with only one element).
+    loadDummyChatModel("dummy-chat-multivalued-1", "{\"value\": \"a single string\"}");
 
     addWithChain(
         sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."),

From 55ff9ab79c415b9783aa7cb0936d573020e21ff8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= 
Date: Mon, 13 Apr 2026 17:55:38 +0200
Subject: [PATCH 11/17] [llm-document-enrichment] Addressed comments in Anna's
 review

---
 .../store/rest/ManagedChatModelStore.java     |  5 -
 ...umentEnrichmentUpdateProcessorFactory.java | 22 ++---
 ...tEnrichmentUpdateProcessorFactoryTest.java | 98 ++++++++-----------
 ...DocumentEnrichmentUpdateProcessorTest.java | 36 +++----
 4 files changed, 69 insertions(+), 92 deletions(-)

diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java
index 306ee19f02d9..cd83dfba1cee 100644
--- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java
+++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java
@@ -111,11 +111,6 @@ public ManagedChatModelStore(
     store = new ChatModelStore();
   }
 
-  @Override
-  protected ManagedResourceStorage createStorage(
-      ManagedResourceStorage.StorageIO storageIO, SolrResourceLoader loader) throws SolrException {
-    return new ManagedResourceStorage.JsonStorage(storageIO, loader);
-  }
 
   @Override
   protected void onManagedDataLoadedFromStorage(NamedList managedInitArgs, Object managedData)
diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java
index 0fbda6969bc0..dddcac4ae3b6 100644
--- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java
+++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java
@@ -286,25 +286,25 @@ private static JsonSchemaElement toJsonSchemaElement(FieldType fieldType) {
 
   private static void validatePromptPlaceholders(String prompt, List fieldNames) {
     Set promptPlaceholders = new HashSet<>();
-    Matcher m = PLACEHOLDER_PATTERN.matcher(prompt);
-    while (m.find()) {
-      promptPlaceholders.add(m.group(1));
+    Matcher matcher = PLACEHOLDER_PATTERN.matcher(prompt);
+    while (matcher.find()) {
+      promptPlaceholders.add(matcher.group(1));
     }
 
-    Set fieldsWithoutPlaceholder = new HashSet<>(fieldNames);
-    fieldsWithoutPlaceholder.removeAll(promptPlaceholders);
-    if (!fieldsWithoutPlaceholder.isEmpty()) {
+    Set fieldsWithoutPlaceholderInPrompt = new HashSet<>(fieldNames);
+    fieldsWithoutPlaceholderInPrompt.removeAll(promptPlaceholders);
+    if (!fieldsWithoutPlaceholderInPrompt.isEmpty()) {
       throw new SolrException(
           SolrException.ErrorCode.SERVER_ERROR,
-          "prompt is missing placeholders for inputField(s): " + fieldsWithoutPlaceholder);
+          "prompt is missing placeholders for inputField(s): " + fieldsWithoutPlaceholderInPrompt);
     }
 
-    Set placeholdersWithoutField = new HashSet<>(promptPlaceholders);
-    placeholdersWithoutField.removeAll(new HashSet<>(fieldNames));
-    if (!placeholdersWithoutField.isEmpty()) {
+    Set placeholdersInPromptWithoutField = new HashSet<>(promptPlaceholders);
+    placeholdersInPromptWithoutField.removeAll(new HashSet<>(fieldNames));
+    if (!placeholdersInPromptWithoutField.isEmpty()) {
       throw new SolrException(
           SolrException.ErrorCode.SERVER_ERROR,
-          "prompt contains placeholders not declared as inputField(s): " + placeholdersWithoutField);
+          "prompt contains placeholders not declared as inputField(s): " + placeholdersInPromptWithoutField);
     }
   }
 
diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java
index 1acf0aba6212..33baef699f73 100644
--- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java
+++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java
@@ -19,6 +19,7 @@
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.util.NamedList;
@@ -60,14 +61,7 @@ public void after() {
 
   @Test
   public void init_fullArgs_shouldInitAllParams() {
-    NamedList args = new NamedList<>();
-    args.add("inputField", "string_field");
-    args.add("outputField", "enriched_field");
-    args.add("prompt", "Summarize: {string_field}");
-    args.add("model", "model1");
-
-    DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory();
-    factory.init(args);
+    DocumentEnrichmentUpdateProcessorFactory factory = initializeUpdateProcessorFactory(List.of("string_field"), "enriched_field", null, "model1");
 
     assertEquals(List.of("string_field"), factory.getInputFields());
     assertEquals("enriched_field", factory.getOutputField());
@@ -77,15 +71,7 @@ public void init_fullArgs_shouldInitAllParams() {
 
   @Test
   public void init_multipleInputFields_shouldInitAllFields() {
-    NamedList args = new NamedList<>();
-    args.add("inputField", "string_field");
-    args.add("inputField", "body_field");
-    args.add("outputField", "enriched_field");
-    args.add("prompt", "Title: {string_field}. Body: {body_field}.");
-    args.add("model", "model1");
-
-    DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory();
-    factory.init(args);
+    DocumentEnrichmentUpdateProcessorFactory factory = initializeUpdateProcessorFactory(List.of("string_field", "body_field"), "enriched_field", null, "model1");
 
     assertEquals(List.of("string_field", "body_field"), factory.getInputFields());
   }
@@ -104,6 +90,7 @@ public void init_arrInputField_shouldInitAllFields() {
     assertEquals(List.of("string_field", "body_field"), factory.getInputFields());
   }
 
+  // when exception are thrown at init time, the helper function cannot be used
   @Test
   public void init_noInputField_shouldThrowExceptionWithDetailedMessage() {
     NamedList args = new NamedList<>();
@@ -218,14 +205,7 @@ public void init_nullModel_shouldThrowExceptionWithDetailedMessage() {
 
   @Test
   public void init_promptFile_shouldLoadPromptFromFile() {
-    NamedList args = new NamedList<>();
-    args.add("inputField", "string_field");
-    args.add("outputField", "enriched_field");
-    args.add("promptFile", "prompt.txt");
-    args.add("model", "model1");
-
-    DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory();
-    factory.init(args);
+    DocumentEnrichmentUpdateProcessorFactory factory = initializeUpdateProcessorFactory(List.of("string_field"), "enriched_field", "prompt.txt", "model1");
     factory.inform(collection1);
 
     assertEquals("prompt.txt", factory.getPromptFile());
@@ -235,15 +215,7 @@ public void init_promptFile_shouldLoadPromptFromFile() {
 
   @Test
   public void init_promptFileMultiField_shouldLoadAndValidateBothPlaceholders() {
-    NamedList args = new NamedList<>();
-    args.add("inputField", "string_field");
-    args.add("inputField", "body_field");
-    args.add("outputField", "enriched_field");
-    args.add("promptFile", "prompt-multi-field.txt");
-    args.add("model", "model1");
-
-    DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory();
-    factory.init(args);
+    DocumentEnrichmentUpdateProcessorFactory factory = initializeUpdateProcessorFactory(List.of("string_field", "body_field"), "enriched_field", "prompt-multi-field.txt", "model1");
     factory.inform(collection1);
 
     assertNotNull(factory.getPrompt());
@@ -253,14 +225,7 @@ public void init_promptFileMultiField_shouldLoadAndValidateBothPlaceholders() {
 
   @Test
   public void init_promptFileWithMissingPlaceholder_shouldThrowExceptionInInform() {
-    NamedList args = new NamedList<>();
-    args.add("inputField", "string_field");
-    args.add("outputField", "enriched_field");
-    args.add("promptFile", "prompt-no-placeholder.txt");
-    args.add("model", "model1");
-
-    DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory();
-    factory.init(args);
+    DocumentEnrichmentUpdateProcessorFactory factory = initializeUpdateProcessorFactory(List.of("string_field"), "enriched_field", "prompt-no-placeholder.txt", "model1");
 
     SolrException e = assertThrows(SolrException.class, () -> factory.inform(collection1));
     assertEquals(
@@ -271,7 +236,7 @@ public void init_promptFileWithMissingPlaceholder_shouldThrowExceptionInInform()
 
   @Test
   public void init_notExistentOutputField_shouldThrowExceptionWithDetailedMessage() throws Exception {
-    SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("string_field"),"notExistentOutput", collection1, "model1"));
+    SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("string_field"),"notExistentOutput", null, collection1, "model1"));
     assertEquals("undefined field: \"notExistentOutput\"", e.getMessage());
     restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1");
   }
@@ -279,7 +244,7 @@ public void init_notExistentOutputField_shouldThrowExceptionWithDetailedMessage(
   @Test
   public void init_notTextualOutputField_shouldThrowExceptionWithDetailedMessage() throws Exception{
     // vector is a DenseVectorField and it's not supported
-    SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("string_field"), "vector", collection1, "model1"));
+    SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("string_field"), "vector", null, collection1, "model1"));
     assertEquals(
         "field type is not supported by Document Enrichment: DenseVectorField", e.getMessage());
     restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1");
@@ -288,7 +253,7 @@ public void init_notTextualOutputField_shouldThrowExceptionWithDetailedMessage()
   @Test
   public void init_unsupportedOutputFieldType_shouldThrowExceptionWithDetailedMessage() throws Exception {
     // output_binary is a BinaryField, which is not supported (and is not DenseVectorField)
-    SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("string_field"), "output_binary", collection1, "model1"));
+    SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("string_field"), "output_binary", null, collection1, "model1"));
     assertEquals(
         "field type is not supported by Document Enrichment: BinaryField", e.getMessage());
     restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1");
@@ -296,14 +261,14 @@ public void init_unsupportedOutputFieldType_shouldThrowExceptionWithDetailedMess
 
   @Test
   public void init_notExistentInputField_shouldThrowExceptionWithDetailedMessage() throws Exception {
-    SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("notExistentInput"), "enriched_field", collection1, "model1"));
+    SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("notExistentInput"), "enriched_field", null, collection1, "model1"));
     assertEquals("undefined field: \"notExistentInput\"", e.getMessage());
     restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1");
   }
 
   @Test
   public void init_multipleInputFields_oneNotExistent_shouldThrowExceptionWithDetailedMessage() throws Exception {
-    SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("string_field", "notExistentInput"), "enriched_field_multi", collection1, "model1"));
+    SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("string_field", "notExistentInput"), "enriched_field_multi", null, collection1, "model1"));
     assertEquals("undefined field: \"notExistentInput\"", e.getMessage());
     restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1");
   }
@@ -311,7 +276,7 @@ public void init_multipleInputFields_oneNotExistent_shouldThrowExceptionWithDeta
   @Test
   public void init_multivaluedStringOutputField_shouldNotThrowException() throws Exception {
     UpdateRequestProcessor instance =
-        createUpdateProcessor(List.of("string_field"), "enriched_field_multi", collection1, "model1");
+        createUpdateProcessor(List.of("string_field"), "enriched_field_multi", null, collection1, "model1");
     assertNotNull(instance);
     restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1");
   }
@@ -379,7 +344,7 @@ public void init_singleValuedStringOutputField_buildResponseFormat_shouldProduce
   @Test
   public void init_dynamicInputField_shouldNotThrowException() throws Exception{
     UpdateRequestProcessor instance =
-        createUpdateProcessor(List.of("text_s"), "enriched_field", collection1, "model1");
+        createUpdateProcessor(List.of("text_s"), "enriched_field", null, collection1, "model1");
     assertNotNull(instance);
     restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1");
   }
@@ -387,31 +352,48 @@ public void init_dynamicInputField_shouldNotThrowException() throws Exception{
   @Test
   public void init_multipleDynamicInputFields_shouldNotThrowException() throws Exception{
     UpdateRequestProcessor instance =
-        createUpdateProcessor(List.of("text_s", "body_field"), "enriched_field", collection1, "model1");
+        createUpdateProcessor(List.of("text_s", "body_field"), "enriched_field", null, collection1, "model1");
     assertNotNull(instance);
     restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1");
   }
 
   private UpdateRequestProcessor createUpdateProcessor(
-      List inputFieldNames, String outputFieldName, SolrCore core, String modelName)
+      List inputFieldNames,
+      String outputFieldName,
+      String prompt,
+      SolrCore core,
+      String modelName)
   throws Exception {
-    NamedList args = new NamedList<>();
 
     ManagedChatModelStore.getManagedModelStore(core)
         .addModel(new SolrChatModel(modelName, null, null));
+
+    DocumentEnrichmentUpdateProcessorFactory factory =
+        initializeUpdateProcessorFactory(inputFieldNames, outputFieldName, prompt, modelName);
+
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    SolrQueryRequestBase req = new SolrQueryRequestBase(core, params) {};
+
+    return factory.getInstance(req, null, null);
+  }
+
+  private DocumentEnrichmentUpdateProcessorFactory initializeUpdateProcessorFactory(
+      List inputFieldNames, String outputFieldName, String prompt, String modelName) {
+    NamedList args = new NamedList<>();
+
     for  (String fieldName : inputFieldNames) {
       args.add("inputField", fieldName);
     }
     args.add("outputField", outputFieldName);
-    args.add("prompt", "Summarize: {" + String.join("}. {", inputFieldNames) + "}.");
+
+    args.add(
+        "prompt",
+        Objects.requireNonNullElseGet(prompt, () -> "Summarize: {" + String.join("}. {", inputFieldNames) + "}."));
+
     args.add("model", modelName);
 
     DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory();
-    ModifiableSolrParams params = new ModifiableSolrParams();
     factory.init(args);
-
-    SolrQueryRequestBase req = new SolrQueryRequestBase(core, params) {};
-
-    return factory.getInstance(req, null, null);
+    return factory;
   }
 }
diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java
index d499a90ecbfa..561aafebd309 100644
--- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java
+++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java
@@ -435,7 +435,7 @@ public void processAdd_multivaluedStringOutputField_emptyInput_shouldSkipEnrichm
   @Test
   public void processAdd_singleTypedOutputField_shouldPopulateValue() throws Exception {
     record TypedCase(String modelId, String response, String chain, String field, String expectedValue) {}
-    List cases = List.of(
+    List typedCases = List.of(
         new TypedCase("dummy-long",    "{\"value\": 3000000000}",               "documentEnrichmentSingleLong",    "output_long",    "3000000000"),
         new TypedCase("dummy-int",     "{\"value\": 7}",                        "documentEnrichmentSingleInt",     "output_int",     "7"),
         new TypedCase("dummy-float",   "{\"value\": 1.5}",                      "documentEnrichmentSingleFloat",   "output_float",   "1.5"),
@@ -444,22 +444,22 @@ record TypedCase(String modelId, String response, String chain, String field, St
         new TypedCase("dummy-date",    "{\"value\": \"2024-01-15T00:00:00Z\"}", "documentEnrichmentSingleDate",    "output_date",    "'2024-01-15T00:00:00Z'")
     );
 
-    for (TypedCase c : cases) {
-      loadDummyChatModel(c.modelId(), c.response());
-      addWithChain(sdoc("id", "99", "string_field", "some content"), c.chain());
-      addWithChain(sdoc("id", "98", "string_field", "other content"), c.chain());
+    for (TypedCase typedCase : typedCases) {
+      loadDummyChatModel(typedCase.modelId(), typedCase.response());
+      addWithChain(sdoc("id", "99", "string_field", "some content"), typedCase.chain());
+      addWithChain(sdoc("id", "98", "string_field", "other content"), typedCase.chain());
       assertU(commit());
 
-      final SolrQuery query = getEnrichmentQuery(c.field());
+      final SolrQuery query = getEnrichmentQuery(typedCase.field());
       assertJQ(
           "/query" + query.toQueryString(),
           "/response/numFound==2]",
           "/response/docs/[0]/id=='99'",
-          "/response/docs/[0]/" + c.field() + "==" + c.expectedValue(),
+          "/response/docs/[0]/" + typedCase.field() + "==" + typedCase.expectedValue(),
           "/response/docs/[1]/id=='98'",
-          "/response/docs/[1]/" + c.field() + "==" + c.expectedValue());
+          "/response/docs/[1]/" + typedCase.field() + "==" + typedCase.expectedValue());
 
-      restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + c.modelId());
+      restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + typedCase.modelId());
       loadedModelId = null;
     }
   }
@@ -469,7 +469,7 @@ record TypedCase(String modelId, String response, String chain, String field, St
   @Test
   public void processAdd_multivaluedTypedOutputField_shouldPopulateAllValues() throws Exception {
     record TypeCaseMulti(String modelId, String response, String chain, String field, List expectedValues) {}
-    List cases = List.of(
+    List typedCaseMultis = List.of(
         new TypeCaseMulti("dummy-chat-multivalued-1", "{\"value\": [\"tag1\", \"tag2\"]}",
             "documentEnrichmentMultivaluedString",  "enriched_field_multi",  List.of("'tag1'", "'tag2'")),
         new TypeCaseMulti("dummy-long-multi",  "{\"value\": [1000000000, 2000000000, 3000000000]}",
@@ -486,25 +486,25 @@ record TypeCaseMulti(String modelId, String response, String chain, String field
             "documentEnrichmentMultivaluedDate",    "output_date_multi",     List.of("'2024-01-15T00:00:00Z'", "'2025-06-30T00:00:00Z'"))
     );
 
-    for (TypeCaseMulti c : cases) {
-      loadDummyChatModel(c.modelId(), c.response());
-      addWithChain(sdoc("id", "99", "string_field", "some content"), c.chain());
-      addWithChain(sdoc("id", "98", "string_field", "other content"), c.chain());
+    for (TypeCaseMulti typedCase : typedCaseMultis) {
+      loadDummyChatModel(typedCase.modelId(), typedCase.response());
+      addWithChain(sdoc("id", "99", "string_field", "some content"), typedCase.chain());
+      addWithChain(sdoc("id", "98", "string_field", "other content"), typedCase.chain());
       assertU(commit());
 
-      final SolrQuery query = getEnrichmentQuery(c.field());
+      final SolrQuery query = getEnrichmentQuery(typedCase.field());
       List assertions = new ArrayList<>();
       assertions.add("/response/numFound==2]");
       for (int docIdx = 0; docIdx < 2; docIdx++) {
         String docId = docIdx == 0 ? "'99'" : "'98'";
         assertions.add("/response/docs/[" + docIdx + "]/id==" + docId);
-        for (int i = 0; i < c.expectedValues().size(); i++) {
-          assertions.add("/response/docs/[" + docIdx + "]/" + c.field() + "/[" + i + "]==" + c.expectedValues().get(i));
+        for (int i = 0; i < typedCase.expectedValues().size(); i++) {
+          assertions.add("/response/docs/[" + docIdx + "]/" + typedCase.field() + "/[" + i + "]==" + typedCase.expectedValues().get(i));
         }
       }
       assertJQ("/query" + query.toQueryString(), assertions.toArray(new String[0]));
 
-      restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + c.modelId());
+      restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + typedCase.modelId());
       loadedModelId = null;
     }
   }

From ed400088adaeb4088b268a0f02e852e6b20d8ec7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= 
Date: Tue, 14 Apr 2026 12:45:17 +0200
Subject: [PATCH 12/17] [llm-document-enrichment] Fixed broken tests and
 updated documentation

---
 ...tEnrichmentUpdateProcessorFactoryTest.java | 13 +++--
 .../pages/document-enrichment-with-llms.adoc  | 52 +++++++++++++------
 2 files changed, 45 insertions(+), 20 deletions(-)

diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java
index 33baef699f73..d2f52bb3f554 100644
--- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java
+++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java
@@ -65,7 +65,7 @@ public void init_fullArgs_shouldInitAllParams() {
 
     assertEquals(List.of("string_field"), factory.getInputFields());
     assertEquals("enriched_field", factory.getOutputField());
-    assertEquals("Summarize: {string_field}", factory.getPrompt());
+    assertEquals("Summarize: {string_field}.", factory.getPrompt());
     assertEquals("model1", factory.getModelName());
   }
 
@@ -95,7 +95,7 @@ public void init_arrInputField_shouldInitAllFields() {
   public void init_noInputField_shouldThrowExceptionWithDetailedMessage() {
     NamedList args = new NamedList<>();
     args.add("outputField", "enriched_field");
-    args.add("prompt", "Summarize: {string_field}");
+    args.add("prompt", "Summarize: {string_field}.");
     args.add("model", "model1");
 
     DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory();
@@ -386,9 +386,12 @@ private DocumentEnrichmentUpdateProcessorFactory initializeUpdateProcessorFactor
     }
     args.add("outputField", outputFieldName);
 
-    args.add(
-        "prompt",
-        Objects.requireNonNullElseGet(prompt, () -> "Summarize: {" + String.join("}. {", inputFieldNames) + "}."));
+    if (prompt != null) {
+      args.add("promptFile", prompt);
+    }
+    else {
+      args.add("prompt", "Summarize: {" + String.join("}. {", inputFieldNames) + "}.");
+    }
 
     args.add("model", modelName);
 
diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc
index 0681e99724aa..fd8ef8a40f7d 100644
--- a/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc
+++ b/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc
@@ -71,20 +71,21 @@ See xref:configuration-guide:solr-modules.adoc[Solr Module] for more details.
 ----
 [NOTE]
 ====
-If no component is configured in `solrconfig.xml`, the `ChatModel` store will not be registered and requests to `/schema/chat-model-store` will return an error.
+If no component is configured in `solrconfig.xml`, the `ChatModel` store will not be registered and requests to
+`/schema/chat-model-store` will return an error.
 ====
 
-== Document Enrichment Lifecycle
+== Chat Model Configuration
 
 === Models
 
 * A model in this module is a chat model, that answers with text given a prompt.
-* A model in this Solr module is a reference to an external API that runs the Large Language Model responsible chat
+* A model in this Solr module is a reference to an external API that runs the Large Language Model responsible for chat
 completion.
 
 [IMPORTANT]
 ====
-the Solr chat model specifies the parameters to access the APIs, the LLM doesn't run internally in Solr
+The Solr chat model specifies the parameters to access the APIs, the LLM doesn't run internally in Solr
 
 ====
 
@@ -127,7 +128,7 @@ Each model class has potentially different params.
 Many are shared but for the full set of parameters of the model you are interested in please refer to the official documentation of the LangChain4j version included in Solr: https://docs.langchain4j.dev/category/language-models[Chat Models in LangChain4j].
 
 === Supported Models
-Apache Solr uses https://github.com/langchain4j/langchain4j[LangChain4j] to support document enrichement with LLMs.
+Apache Solr uses https://github.com/langchain4j/langchain4j[LangChain4j] to support document enrichment with LLMs.
 The models currently supported are:
 
 [tabs#supported-chat-models]
@@ -259,7 +260,7 @@ To view all models:
 http://localhost:8983/solr/YOUR_COLLECTION/schema/chat-model-store
 
 
-.Example: /path/myModel.json
+.Example: /path/myOpenAIModel.json
 [source,json]
 ----
 {
@@ -277,7 +278,7 @@ http://localhost:8983/solr/YOUR_COLLECTION/schema/chat-model-store
 }
 ----
 
-=== How to Trigger Document Enrichment during Indexing
+== How to Trigger Document Enrichment during Indexing
 To create new fields starting from existent ones in your documents at indexing time you need to configure an {solr-javadocs}/core/org/apache/solr/update/processor/UpdateRequestProcessorChain.html[Update Request Processor Chain] that includes at least one `DocumentEnrichmentUpdateProcessor` update request processor in one of the 2 following way:
 
 * Update processor with parameter `prompt`
@@ -313,8 +314,9 @@ To create new fields starting from existent ones in your documents at indexing t
 Exactly one of the following parameters is required: `prompt` or `promptFile`.
 
 Another important feature of this module is that one (or more) `inputField` needs to be injected in the prompt. This is
-done by some special tokens, that are the `fieldName` surrounded by curly brackets (e.g., `{fieldName}`). These tokens
-are _mandatory_ for this module to work properly. Solr will throw an error if the parameters are not properly defined.
+done by some special tokens, that are the `fieldName` surrounded by curly brackets (e.g., `{string_field}`, in the
+example above). These tokens are _mandatory_ for this module to work properly. Solr will throw an error if the
+parameters are not properly defined.
 For example, both the prompt and the content of the file prompt.txt, must contain the text '{string_field}', which
 will be substituted with the content of the `string_field` field for each document. An example of a valid prompt with
 multiple input fields is as follows:
@@ -333,16 +335,26 @@ multiple input fields is as follows:
  
 ----
 
+Another way of using more than one `inputField` is by using the following notation, instead of more than one parameter
+with the same name:
+[source,xml]
+----
+
+    title
+    body
+
+----
+
 The LLM response is mapped to the specified `outputField`. Note that this module only supports a subset of Solr's
 available field types, which includes:
 
-* *String/Text*: `StrField`, `TextField`
-* *Date*: `DatePointField`
-* *Numeric*: `IntPointField`, `LongPointField, `FloatPointField`, `DoublePointField`
+* *String/Text*: `StrField`, `TextField`, `SortableTextField`
+* *Date*: `DatePointField` (the LLM must return an ISO-8601 date string; it might be useful to tune your prompt accordingly, to avoid indexing errors)
+* *Numeric*: `IntPointField`, `LongPointField`, `FloatPointField`, `DoublePointField`
 * *Boolean*: `BoolField`
 
 
-This fields _can_ be multivalued. Solr uses structured output form LangChain4j to deal with LLMs' responses.
+These fields _can_ be multivalued. Solr uses structured output from LangChain4j to deal with LLMs' responses.
 
 
 For more details on how to work with update request processors in Apache Solr, please refer to the dedicated page:
@@ -356,12 +368,22 @@ It will slow down substantially your indexing pipeline so make sure to stress te
 
 ====
 
+[NOTE]
+====
+If any `inputField` value is absent or empty for a given document, enrichment is silently skipped for that document:
+the `outputField` is not added and the document is indexed as-is.
+
+If the LLM call fails at runtime (e.g., network error, model timeout), the exception is caught and logged but is
+*non-fatal*: the document is still indexed without the `outputField`.
+Monitor your indexing logs to detect documents that were not enriched as expected.
+====
+
 === Index first and enrich your documents on a second pass
 LLM calls are usually quite slow, so, depending on your use case it could be a good idea to index first your documents
 enrich them with new LLM-generated fields later on.
 
 This can be done in Solr defining two update request processors chains: one that includes all the processors you need,
-excluded the `DocumentEnrichmentUpdateProcessor` (let's call it 'no-enrichment') and one that includes the
+excluding the `DocumentEnrichmentUpdateProcessor` (let's call it 'no-enrichment') and one that includes the
 `DocumentEnrichmentUpdateProcessor` (let's call it 'enrichment').
 
 [source,xml]
@@ -412,7 +434,7 @@ repeated the second time as we are literally replacing the indexed documents one
 If your use case is compatible with xref:indexing-guide:partial-document-updates.adoc[Partial Updates], you can do better:
 
 You still define two chains, but this time the 'enrichment' one only includes the 'DocumentEnrichmentUpdateProcessor'
-(and the xref:configuration-guide:update-request-processors.adoc[Mandatory Processors] )
+(and the xref:configuration-guide:update-request-processors.adoc[Mandatory Processors])
 
 [source,xml]
 ----

From ba27be1678c5ac6d83ad248d4e87adfb2fb645ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= 
Date: Wed, 15 Apr 2026 15:33:25 +0200
Subject: [PATCH 13/17] [llm-document-enrichment] Added tests, Exception for
 multiple output fields and updated documentation

---
 .../store/rest/ManagedChatModelStore.java     |   2 +-
 ...umentEnrichmentUpdateProcessorFactory.java |  13 +-
 .../modelChatExamples/not-a-chat-model.json   |   7 +
 .../languagemodels/TestLanguageModelBase.java |  12 ++
 .../store/rest/TestChatModelManager.java      |  13 +-
 ...tEnrichmentUpdateProcessorFactoryTest.java |  16 +-
 .../pages/update-request-processors.adoc      |   4 +
 .../pages/document-enrichment-with-llms.adoc  | 153 +++++++++++-------
 8 files changed, 161 insertions(+), 59 deletions(-)
 create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/not-a-chat-model.json

diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java
index cd83dfba1cee..a4e15d206799 100644
--- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java
+++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java
@@ -135,7 +135,7 @@ private void addModelFromMap(Map modelMap) {
     try {
       addModel(fromModelMap(solrResourceLoader, modelMap));
     } catch (final ChatModelException e) {
-      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.getMessage(), e);
     }
   }
 
diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java
index dddcac4ae3b6..7daafe4f1561 100644
--- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java
+++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java
@@ -145,9 +145,20 @@ public void init(final NamedList args) {
     }
     inputFields = List.copyOf(fieldNames);
 
+    Collection outputFields = args.removeConfigArgs(OUTPUT_FIELD_PARAM);
+    if (outputFields.isEmpty()) {
+      throw new SolrException(
+          SolrException.ErrorCode.SERVER_ERROR, "Exactly one 'outputField' must be provided");
+    }
+    if (outputFields.size() > 1) {
+      throw new SolrException(
+          SolrException.ErrorCode.SERVER_ERROR,
+          "Only one 'outputField' can be provided, but found: " + outputFields);
+    }
+    outputField = outputFields.iterator().next();
+
     SolrParams params = args.toSolrParams();
     RequiredSolrParams required = params.required();
-    outputField = required.get(OUTPUT_FIELD_PARAM);
     modelName = required.get(MODEL_NAME);
 
     String inlinePrompt = params.get(PROMPT);
diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/not-a-chat-model.json b/solr/modules/language-models/src/test-files/modelChatExamples/not-a-chat-model.json
new file mode 100644
index 000000000000..814a31c92e76
--- /dev/null
+++ b/solr/modules/language-models/src/test-files/modelChatExamples/not-a-chat-model.json
@@ -0,0 +1,7 @@
+{
+  "class": "com.example.NonExistentChatModel",
+  "name": "not-a-chat-model-1",
+  "params": {
+    "apiKey": "test-api-key"
+  }
+}
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java
index d7a4ac9b8c96..ca219b5ddbe3 100644
--- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java
+++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java
@@ -127,6 +127,18 @@ public static void loadChatModel(String fileName, String status) throws Exceptio
         ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==" + status);
   }
 
+  public static void loadChatModel(String fileName, String status, String message)
+      throws Exception {
+    final URL url = TestLanguageModelBase.class.getResource("/modelChatExamples/" + fileName);
+    final String model = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8);
+
+    assertJPut(
+        ManagedChatModelStore.REST_END_POINT,
+        model,
+        "/responseHeader/status==" + status,
+        message);
+  }
+
   public static void loadChatModel(String fileName) throws Exception {
     final URL url = TestLanguageModelBase.class.getResource("/modelChatExamples/" + fileName);
     final String model = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8);
diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java
index 25880eecbcd6..2e76622f4aa3 100644
--- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java
+++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java
@@ -223,7 +223,18 @@ public void loadChatModel_gemini_shouldLoadModelConfig() throws Exception {
 
   @Test
   public void loadChatModel_dummyUnsupportedParam_shouldRaiseError() throws Exception {
-    loadChatModel("dummy-chat-model-unsupported.json", "400");
+    loadChatModel(
+        "dummy-chat-model-unsupported.json",
+        "400",
+        "/error/msg=='Model loading failed for org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel'");
+  }
+
+  @Test
+  public void loadChatModel_notAChatModel_shouldRaiseError() throws Exception {
+    loadChatModel(
+        "not-a-chat-model.json",
+        "400",
+        "/error/msg=='Model loading failed for com.example.NonExistentChatModel'");
   }
 
   @Test
diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java
index d2f52bb3f554..ca31c7b46418 100644
--- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java
+++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java
@@ -114,7 +114,21 @@ public void init_nullOutputField_shouldThrowExceptionWithDetailedMessage() {
     DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory();
 
     SolrException e = assertThrows(SolrException.class, () -> factory.init(args));
-    assertEquals("Missing required parameter: outputField", e.getMessage());
+    assertEquals("Exactly one 'outputField' must be provided", e.getMessage());
+  }
+
+  @Test
+  public void init_moreThanOneOutputField_shouldThrowExceptionWithDetailedMessage() {
+    NamedList args = new NamedList<>();
+    args.add("inputField", "string_field");
+    args.add("outputField", "enriched_field");
+    args.add("outputField", "body_field");
+    args.add("prompt", "Summarize: {string_field}");
+    args.add("model", "model1");
+
+    DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory();
+    SolrException e = assertThrows(SolrException.class, () -> factory.init(args));
+    assertEquals("Only one 'outputField' can be provided, but found: [enriched_field, body_field]", e.getMessage());
   }
 
   @Test
diff --git a/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc b/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc
index c2377045ce1d..a968851e01bc 100644
--- a/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc
+++ b/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc
@@ -421,6 +421,10 @@ The {solr-javadocs}/modules/language-models/index.html[`language-models`] module
 It uses external text to vectors language models to perform the vectorisation for each processed document.
 For more information: xref:query-guide:text-to-vector.adoc[Update Request Processor]
 
+{solr-javadocs}/modules/language-models/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentProcessorFactory.html[DocumentEnrichmentProcessorFactory]:: Update processor which, starting from one or more fields in input and a given prompt, adds the output of an LLM as the value of a new field.
+It uses external chat language models to perform the enrichment of each processed document.
+For more information: xref:indexing-guide:document-enrichment-with-llms.adoc[Document Enrichment documentation]
+
 The {solr-javadocs}/modules/langid/index.html[`langid`] module provides::
 
 {solr-javadocs}/modules/langid/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.html[LangDetectLanguageIdentifierUpdateProcessorFactory]::: Identifies the language of a set of input fields using http://code.google.com/p/language-detection.
diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc
index fd8ef8a40f7d..a03ded2adc5d 100644
--- a/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc
+++ b/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc
@@ -18,11 +18,12 @@
 
 This module brings the power of *Large Language Models* to Solr.
 
-More specifically, it provides the capability, at indexing time, given a prompt and a set of input fields, of calling an
-LLM through https://github.com/langchain4j/langchain4j[LangChain4j] for each document and store the result of the call
-in an `outputField`, that can be of multiple types and even multivalued.
+More specifically, it enables calling an LLM at indexing time to enrich documents with additional/generated/extracted
+data. Given a prompt and a set of input fields, for each document, the LLM is invoked through
+https://github.com/langchain4j/langchain4j[LangChain4j], and the result is stored in an outputField, which can support
+multiple types and may also be multivalued.
 
-_Without_ this module, the LLM calls must be done _outside_ Solr, before indexing.
+_Without_ this module, the LLM calls to enrich documents must be done _outside_ Solr, before indexing.
 
 [IMPORTANT]
 ====
@@ -32,7 +33,7 @@ diligently examined before employing this module in a serious way.
 
 ====
 
-At the moment a subset of LLM providers supported by LangChain4j is supported by Solr.
+At the moment, Solr supports a subset of the LLM providers available in LangChain4j.
 
 *Disclaimer*: Apache Solr is *in no way* affiliated to any of these corporations or services.
 
@@ -69,19 +70,13 @@ See xref:configuration-guide:solr-modules.adoc[Solr Module] for more details.
   
  
 ----
-[NOTE]
-====
-If no component is configured in `solrconfig.xml`, the `ChatModel` store will not be registered and requests to
-`/schema/chat-model-store` will return an error.
-====
 
-== Chat Model Configuration
+== Chat Model Setup
 
 === Models
 
-* A model in this module is a chat model, that answers with text given a prompt.
-* A model in this Solr module is a reference to an external API that runs the Large Language Model responsible for chat
-completion.
+* A model is a chat model that generates a text response given a prompt.
+* A model is a reference to an external API that runs the Large Language Model responsible for chat completion.
 
 [IMPORTANT]
 ====
@@ -279,24 +274,84 @@ http://localhost:8983/solr/YOUR_COLLECTION/schema/chat-model-store
 ----
 
 == How to Trigger Document Enrichment during Indexing
-To create new fields starting from existent ones in your documents at indexing time you need to configure an {solr-javadocs}/core/org/apache/solr/update/processor/UpdateRequestProcessorChain.html[Update Request Processor Chain] that includes at least one `DocumentEnrichmentUpdateProcessor` update request processor in one of the 2 following way:
+To create new fields from existing document fields at indexing time, configure an
+{solr-javadocs}/core/org/apache/solr/update/processor/UpdateRequestProcessorChain.html[UpdateRequestProcessorChain] that
+includes at least one DocumentEnrichmentUpdateProcessor update request processor.
+
+Several parameters must be defined:
+
+`inputField`::
++
+[%autowidth,frame=none]
+|===
+s|Required |Default: none
+|===
++
+One (or more) `inputField` needs to be injected in the prompt. This is  done by some special tokens, that are the
+`fieldName` surrounded by curly brackets (e.g., `{string_field}`, in the example at the
+xref:document-enrichment-with-llms.adoc#minimum-requirements[top of the page]). These tokens are _mandatory_ for this
+module to work properly. Solr will throw an error if the parameters are not properly defined.
+For example, both the prompt or the content of the file `prompt.txt`, must contain the text '{string_field}', which
+will be substituted with the content of the `string_field` field for each document. An example of a valid prompt with
+multiple input fields is as follows:
 
-* Update processor with parameter `prompt`
 +
 [source,xml]
 ----
 
   
-   string_field
+   title
+   body
    summary
-   Summarize this content: {string_field}
-   model-name
+   Summarize with the following information. Title: {title}. Body: {body}.
+   chat-model
   
   
  
 ----
 
-* Update processor with parameter `promptFile`: in this case, the file `prompt.txt` must be uploaded to Solr similarly to any other configuration file (e.g., `solrconfig.xml`, `synonyms.txt`, etc.)
++
+Multiple `inputField` could also be defined by using the following notation:
+
++
+[source,xml]
+----
+
+    title
+    body
+
+----
+
+
+`outputField`::
++
+[%autowidth,frame=none]
+|===
+s|Required |Default: none
+|===
++
+The LLM response is mapped to the specified `outputField`, and only one field is supported as output. Note that this
+module only supports a subset of Solr's available field types, which includes:
+
+* *String/Text*: `StrField`, `TextField`, `SortableTextField`
+* *Date*: `DatePointField` (the LLM must return an ISO-8601 date string; it might be useful to tune your prompt accordingly, to avoid indexing errors)
+* *Numeric*: `IntPointField`, `LongPointField`, `FloatPointField`, `DoublePointField`
+* *Boolean*: `BoolField`
+
+
+These fields _can_ be multivalued. Solr uses structured output from LangChain4j to deal with LLMs' responses.
+
+
+`prompt` or `promptFile`::
++
+[%autowidth,frame=none]
+|===
+s|Exactly one of these parameters is required |Default: none
+|===
++
+These parameters can be defined in the following ways:
+
+* Update processor definition with the `prompt` parameter
 +
 [source,xml]
 ----
@@ -304,57 +359,39 @@ To create new fields starting from existent ones in your documents at indexing t
   
    string_field
    summary
-   prompt.txt
+   Summarize this content: {string_field}
    model-name
   
   
  
 ----
 
-Exactly one of the following parameters is required: `prompt` or `promptFile`.
-
-Another important feature of this module is that one (or more) `inputField` needs to be injected in the prompt. This is
-done by some special tokens, that are the `fieldName` surrounded by curly brackets (e.g., `{string_field}`, in the
-example above). These tokens are _mandatory_ for this module to work properly. Solr will throw an error if the
-parameters are not properly defined.
-For example, both the prompt and the content of the file prompt.txt, must contain the text '{string_field}', which
-will be substituted with the content of the `string_field` field for each document. An example of a valid prompt with
-multiple input fields is as follows:
-
+* Update processor definition with the parameter `promptFile` parameter: in this case, the file `prompt.txt` must be
+uploaded to Solr inside the config folder of the collection (e.g., similarly to `solrconfig.xml`, `synonyms.txt`, etc.)
++
 [source,xml]
 ----
 
   
-   title
-   body
+   string_field
    summary
-   Summarize with the following information. Title: {title}. Body: {body}.
-   chat-model
+   prompt.txt
+   model-name
   
   
  
 ----
 
-Another way of using more than one `inputField` is by using the following notation, instead of more than one parameter
-with the same name:
-[source,xml]
-----
-
-    title
-    body
-
-----
-
-The LLM response is mapped to the specified `outputField`. Note that this module only supports a subset of Solr's
-available field types, which includes:
-
-* *String/Text*: `StrField`, `TextField`, `SortableTextField`
-* *Date*: `DatePointField` (the LLM must return an ISO-8601 date string; it might be useful to tune your prompt accordingly, to avoid indexing errors)
-* *Numeric*: `IntPointField`, `LongPointField`, `FloatPointField`, `DoublePointField`
-* *Boolean*: `BoolField`
+`model`::
++
+[%autowidth,frame=none]
+|===
+s|Required |Default: none
+|===
++
 
+The name of the model that will be uploaded via REST (as shown above).
 
-These fields _can_ be multivalued. Solr uses structured output from LangChain4j to deal with LLMs' responses.
 
 
 For more details on how to work with update request processors in Apache Solr, please refer to the dedicated page:
@@ -379,8 +416,8 @@ Monitor your indexing logs to detect documents that were not enriched as expecte
 ====
 
 === Index first and enrich your documents on a second pass
-LLM calls are usually quite slow, so, depending on your use case it could be a good idea to index first your documents
-enrich them with new LLM-generated fields later on.
+LLM calls are typically slow, so depending on your use case, it may be preferable to first index your documents and
+enrich them with LLM-generated fields at a later stage.
 
 This can be done in Solr defining two update request processors chains: one that includes all the processors you need,
 excluding the `DocumentEnrichmentUpdateProcessor` (let's call it 'no-enrichment') and one that includes the
@@ -498,3 +535,9 @@ boolean `enriched` field to `true`.
 
 Faceting or querying on the boolean `enriched` field can also give you a quick idea on how many documents have been
 enriched with the new generated fields.
+
+[NOTE]
+====
+To gain information about several ways to target a different `updateRequestProcessorChain` from the default one, see the
+section related to xref:configuration-guide:update-request-processors.adoc#using-custom-chains[Using Custom Chains].
+====

From b092f22ddf111fff9b3623ace0ed33054c70e399 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= 
Date: Wed, 15 Apr 2026 17:22:47 +0200
Subject: [PATCH 14/17] [llm-document-enrichment] Updated documentation

---
 .../pages/document-enrichment-with-llms.adoc  | 285 +++++++++---------
 1 file changed, 138 insertions(+), 147 deletions(-)

diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc
index a03ded2adc5d..8fd395117b25 100644
--- a/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc
+++ b/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc
@@ -20,7 +20,7 @@ This module brings the power of *Large Language Models* to Solr.
 
 More specifically, it enables calling an LLM at indexing time to enrich documents with additional/generated/extracted
 data. Given a prompt and a set of input fields, for each document, the LLM is invoked through
-https://github.com/langchain4j/langchain4j[LangChain4j], and the result is stored in an outputField, which can support
+https://github.com/langchain4j/langchain4j[LangChain4j], and the result is stored in an output field, which can support
 multiple types and may also be multivalued.
 
 _Without_ this module, the LLM calls to enrich documents must be done _outside_ Solr, before indexing.
@@ -56,7 +56,87 @@ Language Models is a module and therefore its plugins must be configured in `sol
 * Enable the `language-models` module to make the Language Models classes available on Solr's classpath.
 See xref:configuration-guide:solr-modules.adoc[Solr Module] for more details.
 
-* An update processor, similar to the one below, must be declared in `solrconfig.xml`:
+* An {solr-javadocs}/core/org/apache/solr/update/processor/UpdateRequestProcessorChain.html[UpdateRequestProcessorChain]
+that includes at least one `DocumentEnrichmentUpdateProcessor` update processor.
+
+=== Update Processor Chain Design
+
+To properly design the Update Processor Chain for Document Enrichment, several parameters must be defined:
+
+`inputField`::
++
+[%autowidth,frame=none]
+|===
+s|Required |Default: none
+|===
++
+The field whose content is passed to the LLM to enrich the documents. Every `inputField` declared must be referred to in
+the prompt.
+
++
+Multiple `inputField` are supported and can be defined by using one of the following notations:
+
+* Add more than one `inputField` string element
++
+[source,xml]
+----
+
+  
+   title
+   body
+   summary
+   Summarize with the following information. Title: {title}. Body: {body}.
+   chat-model
+  
+  
+ 
+----
+
+* Substitute the `inputField` string element with an array of string elements with the same name
++
+[source,xml]
+----
+
+    title
+    body
+
+----
+
+
+`outputField`::
++
+[%autowidth,frame=none]
+|===
+s|Required |Default: none
+|===
++
+The LLM response is mapped to the specified `outputField`, and only one field is supported as output. Note that this
+module only supports a subset of Solr's available field types, which includes:
+
+* *String/Text*: `StrField`, `TextField`, `SortableTextField`
+* *Date*: `DatePointField` (the LLM must return an ISO-8601 date string; it might be useful to tune your prompt accordingly, to avoid indexing errors)
+* *Numeric*: `IntPointField`, `LongPointField`, `FloatPointField`, `DoublePointField`
+* *Boolean*: `BoolField`
+
+
+These fields _can_ be multivalued. Solr uses structured output from LangChain4j to deal with LLMs' responses.
+
+
+`prompt` or `promptFile`::
++
+[%autowidth,frame=none]
+|===
+s|Exactly one of these parameters is required |Default: none
+|===
++
+Two different ways to define a prompt are available: one directly in the solrconfig and one through a dedicated file.
+Either way, the content of the prompt _must_ contain a special token for each `inputField` declared, that are the
+`fieldName` surrounded by curly brackets (e.g., `{string_field}`, in the example below). Solr will throw an error if
+the parameters are not properly defined.
++
+These parameters can be defined in one of the following ways:
+
+* Update processor definition with the `prompt` parameter
 +
 [source,xml]
 ----
@@ -71,6 +151,55 @@ See xref:configuration-guide:solr-modules.adoc[Solr Module] for more details.
  
 ----
 
+* Update processor definition with the parameter `promptFile` parameter: in this case, the file `prompt.txt` must be
+uploaded to Solr inside the config folder of the collection (e.g., similarly to `solrconfig.xml`, `synonyms.txt`, etc.)
++
+[source,xml]
+----
+
+  
+   string_field
+   summary
+   prompt.txt
+   model-name
+  
+  
+ 
+----
+
+`model`::
++
+[%autowidth,frame=none]
+|===
+s|Required |Default: none
+|===
++
+
+The name of the model that will be uploaded via REST. See xref:document-enrichment-with-llms.adoc#chat-model-setup[] for
+more information.
+
+
+For more details on how to work with update request processors in Apache Solr, please refer to the dedicated page:
+xref:configuration-guide:update-request-processors.adoc[Update Request Processor]
+
+[IMPORTANT]
+====
+This update processor sends your document field content off to some hosted service on the internet.
+There are serious performance implications that should be diligently examined before employing this component in production.
+It will slow down substantially your indexing pipeline so make sure to stress test your solution before going live.
+
+====
+
+[NOTE]
+====
+If any `inputField` value is absent or empty for a given document, enrichment is silently skipped for that document:
+the `outputField` is not added and the document is indexed as-is.
+
+If the LLM call fails at runtime (e.g., network error, model timeout), the exception is caught and logged but is
+*non-fatal*: the document is still indexed without the `outputField`.
+Monitor your indexing logs to detect documents that were not enriched as expected.
+====
+
 == Chat Model Setup
 
 === Models
@@ -80,7 +209,7 @@ See xref:configuration-guide:solr-modules.adoc[Solr Module] for more details.
 
 [IMPORTANT]
 ====
-The Solr chat model specifies the parameters to access the APIs, the LLM doesn't run internally in Solr
+The Solr chat model specifies the parameters to access the APIs, the LLM doesn't run internally in Solr.
 
 ====
 
@@ -110,7 +239,8 @@ Accepted values:
 s|Required |Default: none
 |===
 +
-The identifier of your model, this is used by any component that intends to use the model (e.g., `DocumentEnrichmentUpdateProcessorFactory` update processor).
+The identifier of your model, this is used by any component that intends to use the model
+(e.g., `DocumentEnrichmentUpdateProcessorFactory` update processor).
 
 `params`::
 +
@@ -120,7 +250,9 @@ The identifier of your model, this is used by any component that intends to use
 |===
 +
 Each model class has potentially different params.
-Many are shared but for the full set of parameters of the model you are interested in please refer to the official documentation of the LangChain4j version included in Solr: https://docs.langchain4j.dev/category/language-models[Chat Models in LangChain4j].
+Many are shared but for the full set of parameters of the model you are interested in please refer to the official
+documentation of the LangChain4j version included in Solr:
+https://docs.langchain4j.dev/category/language-models[Chat Models in LangChain4j].
 
 === Supported Models
 Apache Solr uses https://github.com/langchain4j/langchain4j[LangChain4j] to support document enrichment with LLMs.
@@ -273,149 +405,8 @@ http://localhost:8983/solr/YOUR_COLLECTION/schema/chat-model-store
 }
 ----
 
-== How to Trigger Document Enrichment during Indexing
-To create new fields from existing document fields at indexing time, configure an
-{solr-javadocs}/core/org/apache/solr/update/processor/UpdateRequestProcessorChain.html[UpdateRequestProcessorChain] that
-includes at least one DocumentEnrichmentUpdateProcessor update request processor.
-
-Several parameters must be defined:
-
-`inputField`::
-+
-[%autowidth,frame=none]
-|===
-s|Required |Default: none
-|===
-+
-One (or more) `inputField` needs to be injected in the prompt. This is  done by some special tokens, that are the
-`fieldName` surrounded by curly brackets (e.g., `{string_field}`, in the example at the
-xref:document-enrichment-with-llms.adoc#minimum-requirements[top of the page]). These tokens are _mandatory_ for this
-module to work properly. Solr will throw an error if the parameters are not properly defined.
-For example, both the prompt or the content of the file `prompt.txt`, must contain the text '{string_field}', which
-will be substituted with the content of the `string_field` field for each document. An example of a valid prompt with
-multiple input fields is as follows:
-
-+
-[source,xml]
-----
-
-  
-   title
-   body
-   summary
-   Summarize with the following information. Title: {title}. Body: {body}.
-   chat-model
-  
-  
- 
-----
-
-+
-Multiple `inputField` could also be defined by using the following notation:
-
-+
-[source,xml]
-----
-
-    title
-    body
-
-----
-
-
-`outputField`::
-+
-[%autowidth,frame=none]
-|===
-s|Required |Default: none
-|===
-+
-The LLM response is mapped to the specified `outputField`, and only one field is supported as output. Note that this
-module only supports a subset of Solr's available field types, which includes:
-
-* *String/Text*: `StrField`, `TextField`, `SortableTextField`
-* *Date*: `DatePointField` (the LLM must return an ISO-8601 date string; it might be useful to tune your prompt accordingly, to avoid indexing errors)
-* *Numeric*: `IntPointField`, `LongPointField`, `FloatPointField`, `DoublePointField`
-* *Boolean*: `BoolField`
-
-
-These fields _can_ be multivalued. Solr uses structured output from LangChain4j to deal with LLMs' responses.
-
-
-`prompt` or `promptFile`::
-+
-[%autowidth,frame=none]
-|===
-s|Exactly one of these parameters is required |Default: none
-|===
-+
-These parameters can be defined in the following ways:
-
-* Update processor definition with the `prompt` parameter
-+
-[source,xml]
-----
-
-  
-   string_field
-   summary
-   Summarize this content: {string_field}
-   model-name
-  
-  
- 
-----
-
-* Update processor definition with the parameter `promptFile` parameter: in this case, the file `prompt.txt` must be
-uploaded to Solr inside the config folder of the collection (e.g., similarly to `solrconfig.xml`, `synonyms.txt`, etc.)
-+
-[source,xml]
-----
-
-  
-   string_field
-   summary
-   prompt.txt
-   model-name
-  
-  
- 
-----
-
-`model`::
-+
-[%autowidth,frame=none]
-|===
-s|Required |Default: none
-|===
-+
-
-The name of the model that will be uploaded via REST (as shown above).
-
-
-
-For more details on how to work with update request processors in Apache Solr, please refer to the dedicated page:
-xref:configuration-guide:update-request-processors.adoc[Update Request Processor]
-
-[IMPORTANT]
-====
-This update processor sends your document field content off to some hosted service on the internet.
-There are serious performance implications that should be diligently examined before employing this component in production.
-It will slow down substantially your indexing pipeline so make sure to stress test your solution before going live.
-
-====
-
-[NOTE]
-====
-If any `inputField` value is absent or empty for a given document, enrichment is silently skipped for that document:
-the `outputField` is not added and the document is indexed as-is.
-
-If the LLM call fails at runtime (e.g., network error, model timeout), the exception is caught and logged but is
-*non-fatal*: the document is still indexed without the `outputField`.
-Monitor your indexing logs to detect documents that were not enriched as expected.
-====
 
-=== Index first and enrich your documents on a second pass
+== Index First and Enrich your Documents on a Second Pass
 LLM calls are typically slow, so depending on your use case, it may be preferable to first index your documents and
 enrich them with LLM-generated fields at a later stage.
 

From 4f39e40dfe0c487a89b4e44eef10c8eaa9719f84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= 
Date: Fri, 17 Apr 2026 11:18:53 +0200
Subject: [PATCH 15/17] [llm-document-enrichment] Pre-check changes

---
 .../langchain4j-anthropic-1.9.1.jar.sha1      |   1 +
 ...angchain4j-google-ai-gemini-1.9.1.jar.sha1 |   1 +
 .../langchain4j-ollama-1.9.1.jar.sha1         |   1 +
 .../model/SolrChatModel.java                  |  19 +-
 .../store/ChatModelStore.java                 |   3 +-
 .../store/rest/ManagedChatModelStore.java     |   5 +-
 .../DocumentEnrichmentUpdateProcessor.java    |   3 +-
 ...umentEnrichmentUpdateProcessorFactory.java |  17 +-
 .../solr/collection1/conf/enumsConfig.xml     |  16 ++
 .../languagemodels/TestLanguageModelBase.java |  11 +-
 .../model/DummyChatModelTest.java             |   2 +-
 .../store/rest/TestChatModelManager.java      |  22 +-
 .../rest/TestChatModelManagerPersistence.java |  15 +-
 ...stManagedChatModelStoreInitialization.java |   6 +-
 ...tEnrichmentUpdateProcessorFactoryTest.java | 194 ++++++++-----
 ...DocumentEnrichmentUpdateProcessorTest.java | 260 +++++++++++++-----
 16 files changed, 382 insertions(+), 194 deletions(-)
 create mode 100644 solr/licenses/langchain4j-anthropic-1.9.1.jar.sha1
 create mode 100644 solr/licenses/langchain4j-google-ai-gemini-1.9.1.jar.sha1
 create mode 100644 solr/licenses/langchain4j-ollama-1.9.1.jar.sha1

diff --git a/solr/licenses/langchain4j-anthropic-1.9.1.jar.sha1 b/solr/licenses/langchain4j-anthropic-1.9.1.jar.sha1
new file mode 100644
index 000000000000..83fcaa412e36
--- /dev/null
+++ b/solr/licenses/langchain4j-anthropic-1.9.1.jar.sha1
@@ -0,0 +1 @@
+905f570cd38c2ebd94fa159f2e88bca06bbf71c8
diff --git a/solr/licenses/langchain4j-google-ai-gemini-1.9.1.jar.sha1 b/solr/licenses/langchain4j-google-ai-gemini-1.9.1.jar.sha1
new file mode 100644
index 000000000000..41991a7dac6f
--- /dev/null
+++ b/solr/licenses/langchain4j-google-ai-gemini-1.9.1.jar.sha1
@@ -0,0 +1 @@
+49973974543318de23f3f09b21c79d5e45815d8c
diff --git a/solr/licenses/langchain4j-ollama-1.9.1.jar.sha1 b/solr/licenses/langchain4j-ollama-1.9.1.jar.sha1
new file mode 100644
index 000000000000..8a1828b24320
--- /dev/null
+++ b/solr/licenses/langchain4j-ollama-1.9.1.jar.sha1
@@ -0,0 +1 @@
+e3c631fa2fce6e79ad50fc86adb724656992263a
diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java
index 542985a16e61..c306afe84b32 100644
--- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java
+++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java
@@ -37,9 +37,9 @@
 import org.slf4j.LoggerFactory;
 
 /**
- * This object wraps a {@link dev.langchain4j.model.chat.ChatModel} to produce the content of a field based on the
- * content of other fields specified as input. It's meant to be used as a managed resource with the {@link
- * ManagedChatModelStore}
+ * This object wraps a {@link dev.langchain4j.model.chat.ChatModel} to produce the content of a
+ * field based on the content of other fields specified as input. It's meant to be used as a managed
+ * resource with the {@link ManagedChatModelStore}
  */
 public class SolrChatModel implements Accountable {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -90,9 +90,9 @@ public static SolrChatModel getInstance(
            */
           switch (paramName) {
             case TIMEOUT_PARAM -> builder
-                  .getClass()
-                  .getMethod(paramName, Duration.class)
-                  .invoke(builder, Duration.ofSeconds((Long) params.get(paramName)));
+                .getClass()
+                .getMethod(paramName, Duration.class)
+                .invoke(builder, Duration.ofSeconds((Long) params.get(paramName)));
 
             case MAX_RETRIES_PARAM, THINKING_BUDGET_TOKENS, RANDOM_SEED -> builder
                 .getClass()
@@ -134,8 +134,7 @@ public static SolrChatModel getInstance(
     }
   }
 
-  public SolrChatModel(
-      String name, ChatModel chatModel, Map params) {
+  public SolrChatModel(String name, ChatModel chatModel, Map params) {
     this.name = name;
     this.chatModel = chatModel;
     this.params = params;
@@ -146,8 +145,8 @@ public SolrChatModel(
    * Sends a structured chat request and returns the parsed value from the {@code {"value": ...}}
    * JSON object that the model is instructed to produce via {@code responseFormat}.
    *
-   * @return the extracted value: a {@link String}, {@link Number}, {@link Integer}, {@link Boolean}, or {@link
-   *     java.util.List} depending on the Solr output field type
+   * @return the extracted value: a {@link String}, {@link Number}, {@link Integer}, {@link
+   *     Boolean}, or {@link java.util.List} depending on the Solr output field type
    */
   public Object chat(String prompt, ResponseFormat responseFormat) {
     ChatRequest chatRequest =
diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelStore.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelStore.java
index 96105919c17d..82d215bf857e 100644
--- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelStore.java
+++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelStore.java
@@ -42,8 +42,7 @@ public void clear() {
 
   public List getModels() {
     synchronized (availableModels) {
-      final List availableModelsValues =
-          new ArrayList<>(availableModels.values());
+      final List availableModelsValues = new ArrayList<>(availableModels.values());
       return Collections.unmodifiableList(availableModelsValues);
     }
   }
diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java
index a4e15d206799..217ebb737295 100644
--- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java
+++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java
@@ -78,9 +78,7 @@ public static ManagedChatModelStore getManagedModelStore(SolrCore core) {
    * @return the available models as a list of Maps objects
    */
   private static List modelsAsManagedResources(List models) {
-    return models.stream()
-        .map(ManagedChatModelStore::toModelMap)
-        .collect(Collectors.toList());
+    return models.stream().map(ManagedChatModelStore::toModelMap).collect(Collectors.toList());
   }
 
   @SuppressWarnings("unchecked")
@@ -111,7 +109,6 @@ public ManagedChatModelStore(
     store = new ChatModelStore();
   }
 
-
   @Override
   protected void onManagedDataLoadedFromStorage(NamedList managedInitArgs, Object managedData)
       throws SolrException {
diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java
index 3f90fd8eb580..df591fa7c296 100644
--- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java
+++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java
@@ -82,7 +82,8 @@ public void processAdd(AddUpdateCommand cmd) throws IOException {
     }
 
     try {
-      // as for now, only a plain text as prompt is sent to the model (no support for tools/skills/agents)
+      // as for now, only a plain text as prompt is sent to the model (no support for
+      // tools/skills/agents)
       // chatModel.chat returns the parsed value from the structured JSON response
       Object value = chatModel.chat(injectedPrompt, responseFormat);
       if (multiValued && value instanceof List list) {
diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java
index 7daafe4f1561..fdcec99a2097 100644
--- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java
+++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java
@@ -140,8 +140,7 @@ public void init(final NamedList args) {
     Collection fieldNames = args.removeConfigArgs(INPUT_FIELD_PARAM);
     if (fieldNames.isEmpty()) {
       throw new SolrException(
-          SolrException.ErrorCode.SERVER_ERROR,
-          "At least one 'inputField' must be provided");
+          SolrException.ErrorCode.SERVER_ERROR, "At least one 'inputField' must be provided");
     }
     inputFields = List.copyOf(fieldNames);
 
@@ -166,8 +165,7 @@ public void init(final NamedList args) {
 
     if (inlinePrompt == null && promptFilePath == null) {
       throw new SolrException(
-          SolrException.ErrorCode.SERVER_ERROR,
-          "Either 'prompt' or 'promptFile' must be provided");
+          SolrException.ErrorCode.SERVER_ERROR, "Either 'prompt' or 'promptFile' must be provided");
     }
     if (inlinePrompt != null && promptFilePath != null) {
       throw new SolrException(
@@ -190,9 +188,7 @@ public void inform(SolrCore core) {
         promptText = new String(is.readAllBytes(), StandardCharsets.UTF_8).trim();
       } catch (IOException e) {
         throw new SolrException(
-            SolrException.ErrorCode.SERVER_ERROR,
-            "Cannot read prompt file: " + promptFile,
-            e);
+            SolrException.ErrorCode.SERVER_ERROR, "Cannot read prompt file: " + promptFile, e);
       }
       validatePromptPlaceholders(promptText, inputFields);
     }
@@ -244,8 +240,8 @@ public UpdateRequestProcessor getInstance(
    * is wrapped in a {@link JsonArraySchema} nested inside the root {@link JsonObjectSchema}.
    *
    * 

Nesting {@link JsonArraySchema} inside a {@link JsonObjectSchema} property is supported by - * all langchain4j providers that implement structured outputs with {@link JsonObjectSchema} (OpenAI, Azure OpenAI, - * Google AI, Gemini, Mistral, Ollama, Amazon Bedrock, Watsonx). + * all langchain4j providers that implement structured outputs with {@link JsonObjectSchema} + * (OpenAI, Azure OpenAI, Google AI, Gemini, Mistral, Ollama, Amazon Bedrock, Watsonx). */ static ResponseFormat buildResponseFormat(SchemaField schemaField) { JsonSchemaElement valueElement = toJsonSchemaElement(schemaField.getType()); @@ -315,7 +311,8 @@ private static void validatePromptPlaceholders(String prompt, List field if (!placeholdersInPromptWithoutField.isEmpty()) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, - "prompt contains placeholders not declared as inputField(s): " + placeholdersInPromptWithoutField); + "prompt contains placeholders not declared as inputField(s): " + + placeholdersInPromptWithoutField); } } diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/enumsConfig.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/enumsConfig.xml index 7292b9204753..b9b364d72dfc 100644 --- a/solr/modules/language-models/src/test-files/solr/collection1/conf/enumsConfig.xml +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/enumsConfig.xml @@ -1,4 +1,20 @@ + diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java index a8420feef51e..a4d8f5432ef3 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java @@ -123,8 +123,7 @@ public static void loadChatModel(String fileName, String status) throws Exceptio final URL url = TestLanguageModelBase.class.getResource("/modelChatExamples/" + fileName); final String model = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8); - assertJPut( - ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==" + status); + assertJPut(ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==" + status); } public static void loadChatModel(String fileName, String status, String message) @@ -133,18 +132,14 @@ public static void loadChatModel(String fileName, String status, String message) final String model = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8); assertJPut( - ManagedChatModelStore.REST_END_POINT, - model, - "/responseHeader/status==" + status, - message); + ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==" + status, message); } public static void loadChatModel(String fileName) throws Exception { final URL url = TestLanguageModelBase.class.getResource("/modelChatExamples/" + fileName); final String model = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8); - assertJPut( - ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==0"); + assertJPut(ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==0"); } protected static void prepareIndex() throws Exception { diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModelTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModelTest.java index 6449b7b2f55c..4ed388ac4767 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModelTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModelTest.java @@ -45,4 +45,4 @@ public void constructAndChat() throws Exception { .aiMessage() .text()); } -} \ No newline at end of file +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java index 2e76622f4aa3..b603250fdd6d 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java @@ -127,11 +127,8 @@ public void loadChatModel_openAi_shouldLoadModelConfig() throws Exception { assertJQ( ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/baseUrl=='https://api.openai.com/v1'"); - assertJQ( - ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-openAI'"); - assertJQ( - ManagedChatModelStore.REST_END_POINT, - "/models/[0]/params/modelName=='gpt-5.4-nano'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-openAI'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gpt-5.4-nano'"); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); @@ -149,8 +146,7 @@ public void loadChatModel_mistralAi_shouldLoadModelConfig() throws Exception { assertJQ( ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/baseUrl=='https://api.mistral.ai/v1'"); - assertJQ( - ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-mistralAI'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-mistralAI'"); assertJQ( ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='mistral-small-latest'"); @@ -171,8 +167,7 @@ public void loadChatModel_anthropic_shouldLoadModelConfig() throws Exception { assertJQ( ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/baseUrl=='https://api.anthropic.com/v1'"); - assertJQ( - ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-anthropic'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-anthropic'"); assertJQ( ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='claude-3-5-haiku-latest'"); @@ -193,8 +188,7 @@ public void loadChatModel_ollama_shouldLoadModelConfig() throws Exception { assertJQ( ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/baseUrl=='http://localhost:11434'"); - assertJQ( - ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='llama3.2'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='llama3.2'"); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); @@ -208,11 +202,9 @@ public void loadChatModel_gemini_shouldLoadModelConfig() throws Exception { final String modelName = "gemini-chat-1"; assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-gemini'"); assertJQ( - ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-gemini'"); - assertJQ( - ManagedChatModelStore.REST_END_POINT, - "/models/[0]/params/modelName=='gemini-2.0-flash'"); + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gemini-2.0-flash'"); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java index 0da79ce23a29..329de3fa70b5 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java @@ -60,10 +60,8 @@ public void testModelStorePersistence() throws Exception { assertJQ( ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/baseUrl=='https://api.openai.com/v1'"); - assertJQ( - ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-openAI'"); - assertJQ( - ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gpt-5.4-nano'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-openAI'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gpt-5.4-nano'"); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); @@ -75,10 +73,8 @@ public void testModelStorePersistence() throws Exception { assertJQ( ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/baseUrl=='https://api.openai.com/v1'"); - assertJQ( - ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-openAI'"); - assertJQ( - ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gpt-5.4-nano'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-openAI'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gpt-5.4-nano'"); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); @@ -88,8 +84,7 @@ public void testModelStorePersistence() throws Exception { getJetty().stop(); getJetty().start(); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); - assertJQ( - ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gpt-5.4-nano'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gpt-5.4-nano'"); // delete model and verify persistence of the empty state restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + modelName); diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestManagedChatModelStoreInitialization.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestManagedChatModelStoreInitialization.java index 14ee4251efe0..6c1c690eb8b0 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestManagedChatModelStoreInitialization.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestManagedChatModelStoreInitialization.java @@ -30,11 +30,7 @@ public void cleanUp() throws Exception { @Test public void managedChatModelStore_whenUpdateRequestComponentConfigured_shouldBeInitialized() throws Exception { - setupTest( - "solrconfig-document-enrichment.xml", - "schema-language-models.xml", - false, - false); + setupTest("solrconfig-document-enrichment.xml", "schema-language-models.xml", false, false); assertJQ(ManagedChatModelStore.REST_END_POINT, "/responseHeader/status==0"); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models==[]"); diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java index ca31c7b46418..6b4bf984fa88 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java @@ -19,7 +19,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; -import java.util.Objects; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.NamedList; @@ -61,7 +60,8 @@ public void after() { @Test public void init_fullArgs_shouldInitAllParams() { - DocumentEnrichmentUpdateProcessorFactory factory = initializeUpdateProcessorFactory(List.of("string_field"), "enriched_field", null, "model1"); + DocumentEnrichmentUpdateProcessorFactory factory = + initializeUpdateProcessorFactory(List.of("string_field"), "enriched_field", null, "model1"); assertEquals(List.of("string_field"), factory.getInputFields()); assertEquals("enriched_field", factory.getOutputField()); @@ -71,7 +71,9 @@ public void init_fullArgs_shouldInitAllParams() { @Test public void init_multipleInputFields_shouldInitAllFields() { - DocumentEnrichmentUpdateProcessorFactory factory = initializeUpdateProcessorFactory(List.of("string_field", "body_field"), "enriched_field", null, "model1"); + DocumentEnrichmentUpdateProcessorFactory factory = + initializeUpdateProcessorFactory( + List.of("string_field", "body_field"), "enriched_field", null, "model1"); assertEquals(List.of("string_field", "body_field"), factory.getInputFields()); } @@ -84,7 +86,8 @@ public void init_arrInputField_shouldInitAllFields() { args.add("prompt", "Title: {string_field}. Body: {body_field}."); args.add("model", "model1"); - DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); factory.init(args); assertEquals(List.of("string_field", "body_field"), factory.getInputFields()); @@ -98,7 +101,8 @@ public void init_noInputField_shouldThrowExceptionWithDetailedMessage() { args.add("prompt", "Summarize: {string_field}."); args.add("model", "model1"); - DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); assertEquals("At least one 'inputField' must be provided", e.getMessage()); @@ -111,7 +115,8 @@ public void init_nullOutputField_shouldThrowExceptionWithDetailedMessage() { args.add("prompt", "Summarize: {string_field}"); args.add("model", "model1"); - DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); assertEquals("Exactly one 'outputField' must be provided", e.getMessage()); @@ -126,9 +131,12 @@ public void init_moreThanOneOutputField_shouldThrowExceptionWithDetailedMessage( args.add("prompt", "Summarize: {string_field}"); args.add("model", "model1"); - DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); - assertEquals("Only one 'outputField' can be provided, but found: [enriched_field, body_field]", e.getMessage()); + assertEquals( + "Only one 'outputField' can be provided, but found: [enriched_field, body_field]", + e.getMessage()); } @Test @@ -138,7 +146,8 @@ public void init_neitherPromptNorPromptFile_shouldThrowExceptionWithDetailedMess args.add("outputField", "enriched_field"); args.add("model", "model1"); - DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); assertEquals("Either 'prompt' or 'promptFile' must be provided", e.getMessage()); @@ -153,28 +162,33 @@ public void init_bothPromptAndPromptFile_shouldThrowExceptionWithDetailedMessage args.add("promptFile", "prompt.txt"); args.add("model", "model1"); - DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); assertEquals("Only one of 'prompt' or 'promptFile' can be provided, not both", e.getMessage()); } @Test - public void init_promptMissingPlaceholderForDeclaredField_shouldThrowExceptionWithDetailedMessage() { + public void + init_promptMissingPlaceholderForDeclaredField_shouldThrowExceptionWithDetailedMessage() { NamedList args = new NamedList<>(); args.add("inputField", "string_field"); args.add("outputField", "enriched_field"); args.add("prompt", "Summarize:"); args.add("model", "model1"); - DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); - assertEquals("prompt is missing placeholders for inputField(s): [string_field]", e.getMessage()); + assertEquals( + "prompt is missing placeholders for inputField(s): [string_field]", e.getMessage()); } @Test - public void init_promptMissingOnePlaceholderOfMultipleFields_shouldThrowExceptionWithDetailedMessage() { + public void + init_promptMissingOnePlaceholderOfMultipleFields_shouldThrowExceptionWithDetailedMessage() { NamedList args = new NamedList<>(); args.add("inputField", "string_field"); args.add("inputField", "body_field"); @@ -182,21 +196,24 @@ public void init_promptMissingOnePlaceholderOfMultipleFields_shouldThrowExceptio args.add("prompt", "Title: {string_field}."); args.add("model", "model1"); - DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); assertEquals("prompt is missing placeholders for inputField(s): [body_field]", e.getMessage()); } @Test - public void init_promptHasExtraPlaceholderNotDeclaredAsInputField_shouldThrowExceptionWithDetailedMessage() { + public void + init_promptHasExtraPlaceholderNotDeclaredAsInputField_shouldThrowExceptionWithDetailedMessage() { NamedList args = new NamedList<>(); args.add("inputField", "string_field"); args.add("outputField", "enriched_field"); args.add("prompt", "Title: {string_field}. Extra: {unknown_field}."); args.add("model", "model1"); - DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); assertEquals( @@ -211,7 +228,8 @@ public void init_nullModel_shouldThrowExceptionWithDetailedMessage() { args.add("outputField", "enriched_field"); args.add("prompt", "Summarize: {string_field}"); - DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); assertEquals("Missing required parameter: model", e.getMessage()); @@ -219,7 +237,9 @@ public void init_nullModel_shouldThrowExceptionWithDetailedMessage() { @Test public void init_promptFile_shouldLoadPromptFromFile() { - DocumentEnrichmentUpdateProcessorFactory factory = initializeUpdateProcessorFactory(List.of("string_field"), "enriched_field", "prompt.txt", "model1"); + DocumentEnrichmentUpdateProcessorFactory factory = + initializeUpdateProcessorFactory( + List.of("string_field"), "enriched_field", "prompt.txt", "model1"); factory.inform(collection1); assertEquals("prompt.txt", factory.getPromptFile()); @@ -229,7 +249,12 @@ public void init_promptFile_shouldLoadPromptFromFile() { @Test public void init_promptFileMultiField_shouldLoadAndValidateBothPlaceholders() { - DocumentEnrichmentUpdateProcessorFactory factory = initializeUpdateProcessorFactory(List.of("string_field", "body_field"), "enriched_field", "prompt-multi-field.txt", "model1"); + DocumentEnrichmentUpdateProcessorFactory factory = + initializeUpdateProcessorFactory( + List.of("string_field", "body_field"), + "enriched_field", + "prompt-multi-field.txt", + "model1"); factory.inform(collection1); assertNotNull(factory.getPrompt()); @@ -239,7 +264,9 @@ public void init_promptFileMultiField_shouldLoadAndValidateBothPlaceholders() { @Test public void init_promptFileWithMissingPlaceholder_shouldThrowExceptionInInform() { - DocumentEnrichmentUpdateProcessorFactory factory = initializeUpdateProcessorFactory(List.of("string_field"), "enriched_field", "prompt-no-placeholder.txt", "model1"); + DocumentEnrichmentUpdateProcessorFactory factory = + initializeUpdateProcessorFactory( + List.of("string_field"), "enriched_field", "prompt-no-placeholder.txt", "model1"); SolrException e = assertThrows(SolrException.class, () -> factory.inform(collection1)); assertEquals( @@ -249,40 +276,73 @@ public void init_promptFileWithMissingPlaceholder_shouldThrowExceptionInInform() /* Following tests depend on a real solr schema and depend on BeforeClass-AfterClass methods */ @Test - public void init_notExistentOutputField_shouldThrowExceptionWithDetailedMessage() throws Exception { - SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("string_field"),"notExistentOutput", null, collection1, "model1")); + public void init_notExistentOutputField_shouldThrowExceptionWithDetailedMessage() + throws Exception { + SolrException e = + assertThrows( + SolrException.class, + () -> + createUpdateProcessor( + List.of("string_field"), "notExistentOutput", null, collection1, "model1")); assertEquals("undefined field: \"notExistentOutput\"", e.getMessage()); restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1"); } @Test - public void init_notTextualOutputField_shouldThrowExceptionWithDetailedMessage() throws Exception{ + public void init_notTextualOutputField_shouldThrowExceptionWithDetailedMessage() + throws Exception { // vector is a DenseVectorField and it's not supported - SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("string_field"), "vector", null, collection1, "model1")); + SolrException e = + assertThrows( + SolrException.class, + () -> + createUpdateProcessor( + List.of("string_field"), "vector", null, collection1, "model1")); assertEquals( "field type is not supported by Document Enrichment: DenseVectorField", e.getMessage()); restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1"); } @Test - public void init_unsupportedOutputFieldType_shouldThrowExceptionWithDetailedMessage() throws Exception { + public void init_unsupportedOutputFieldType_shouldThrowExceptionWithDetailedMessage() + throws Exception { // output_binary is a BinaryField, which is not supported (and is not DenseVectorField) - SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("string_field"), "output_binary", null, collection1, "model1")); - assertEquals( - "field type is not supported by Document Enrichment: BinaryField", e.getMessage()); + SolrException e = + assertThrows( + SolrException.class, + () -> + createUpdateProcessor( + List.of("string_field"), "output_binary", null, collection1, "model1")); + assertEquals("field type is not supported by Document Enrichment: BinaryField", e.getMessage()); restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1"); } @Test - public void init_notExistentInputField_shouldThrowExceptionWithDetailedMessage() throws Exception { - SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("notExistentInput"), "enriched_field", null, collection1, "model1")); + public void init_notExistentInputField_shouldThrowExceptionWithDetailedMessage() + throws Exception { + SolrException e = + assertThrows( + SolrException.class, + () -> + createUpdateProcessor( + List.of("notExistentInput"), "enriched_field", null, collection1, "model1")); assertEquals("undefined field: \"notExistentInput\"", e.getMessage()); restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1"); } @Test - public void init_multipleInputFields_oneNotExistent_shouldThrowExceptionWithDetailedMessage() throws Exception { - SolrException e = assertThrows(SolrException.class, () -> createUpdateProcessor(List.of("string_field", "notExistentInput"), "enriched_field_multi", null, collection1, "model1")); + public void init_multipleInputFields_oneNotExistent_shouldThrowExceptionWithDetailedMessage() + throws Exception { + SolrException e = + assertThrows( + SolrException.class, + () -> + createUpdateProcessor( + List.of("string_field", "notExistentInput"), + "enriched_field_multi", + null, + collection1, + "model1")); assertEquals("undefined field: \"notExistentInput\"", e.getMessage()); restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1"); } @@ -290,7 +350,8 @@ public void init_multipleInputFields_oneNotExistent_shouldThrowExceptionWithDeta @Test public void init_multivaluedStringOutputField_shouldNotThrowException() throws Exception { UpdateRequestProcessor instance = - createUpdateProcessor(List.of("string_field"), "enriched_field_multi", null, collection1, "model1"); + createUpdateProcessor( + List.of("string_field"), "enriched_field_multi", null, collection1, "model1"); assertNotNull(instance); restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1"); } @@ -299,25 +360,28 @@ public void init_multivaluedStringOutputField_shouldNotThrowException() throws E @Test public void buildResponseFormat_unsupportedFieldTypes_shouldThrowUnsupportedFieldTypeException() { - var cases = Map.of( - "output_collation", "CollationField", - "output_date_range", "DateRangeField", - "output_enum", "EnumFieldType", - "output_lat_lon", "LatLonPointSpatialField", - "output_random_sort", "RandomSortField", - "output_rank", "RankField", - "output_uuid", "UUIDField", - "output_nest_path", "NestPathField" - ); + var cases = + Map.of( + "output_collation", "CollationField", + "output_date_range", "DateRangeField", + "output_enum", "EnumFieldType", + "output_lat_lon", "LatLonPointSpatialField", + "output_random_sort", "RandomSortField", + "output_rank", "RankField", + "output_uuid", "UUIDField", + "output_nest_path", "NestPathField"); var schema = collection1.getLatestSchema(); - cases.forEach((fieldName, expectedTypeName) -> { - var schemaField = schema.getField(fieldName); - SolrException e = assertThrows(SolrException.class, - () -> DocumentEnrichmentUpdateProcessorFactory.buildResponseFormat(schemaField)); - assertEquals( - "field type is not supported by Document Enrichment: " + expectedTypeName, - e.getMessage()); - }); + cases.forEach( + (fieldName, expectedTypeName) -> { + var schemaField = schema.getField(fieldName); + SolrException e = + assertThrows( + SolrException.class, + () -> DocumentEnrichmentUpdateProcessorFactory.buildResponseFormat(schemaField)); + assertEquals( + "field type is not supported by Document Enrichment: " + expectedTypeName, + e.getMessage()); + }); } @Test @@ -325,8 +389,7 @@ public void init_sortableTextOutputField_buildResponseFormat_shouldProduceString var schemaField = collection1.getLatestSchema().getField("output_sortable_text"); var responseFormat = DocumentEnrichmentUpdateProcessorFactory.buildResponseFormat(schemaField); assertNotNull(responseFormat); - assertEquals( - dev.langchain4j.model.chat.request.ResponseFormatType.JSON, responseFormat.type()); + assertEquals(dev.langchain4j.model.chat.request.ResponseFormatType.JSON, responseFormat.type()); assertNotNull(responseFormat.jsonSchema()); } @@ -338,8 +401,7 @@ public void init_multivaluedStringOutputField_buildResponseFormat_shouldProduceA assertTrue(schemaField.multiValued()); var responseFormat = DocumentEnrichmentUpdateProcessorFactory.buildResponseFormat(schemaField); assertNotNull(responseFormat); - assertEquals( - dev.langchain4j.model.chat.request.ResponseFormatType.JSON, responseFormat.type()); + assertEquals(dev.langchain4j.model.chat.request.ResponseFormatType.JSON, responseFormat.type()); assertNotNull(responseFormat.jsonSchema()); } @@ -350,13 +412,12 @@ public void init_singleValuedStringOutputField_buildResponseFormat_shouldProduce assertFalse(schemaField.multiValued()); var responseFormat = DocumentEnrichmentUpdateProcessorFactory.buildResponseFormat(schemaField); assertNotNull(responseFormat); - assertEquals( - dev.langchain4j.model.chat.request.ResponseFormatType.JSON, responseFormat.type()); + assertEquals(dev.langchain4j.model.chat.request.ResponseFormatType.JSON, responseFormat.type()); assertNotNull(responseFormat.jsonSchema()); } @Test - public void init_dynamicInputField_shouldNotThrowException() throws Exception{ + public void init_dynamicInputField_shouldNotThrowException() throws Exception { UpdateRequestProcessor instance = createUpdateProcessor(List.of("text_s"), "enriched_field", null, collection1, "model1"); assertNotNull(instance); @@ -364,9 +425,10 @@ public void init_dynamicInputField_shouldNotThrowException() throws Exception{ } @Test - public void init_multipleDynamicInputFields_shouldNotThrowException() throws Exception{ + public void init_multipleDynamicInputFields_shouldNotThrowException() throws Exception { UpdateRequestProcessor instance = - createUpdateProcessor(List.of("text_s", "body_field"), "enriched_field", null, collection1, "model1"); + createUpdateProcessor( + List.of("text_s", "body_field"), "enriched_field", null, collection1, "model1"); assertNotNull(instance); restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1"); } @@ -377,7 +439,7 @@ private UpdateRequestProcessor createUpdateProcessor( String prompt, SolrCore core, String modelName) - throws Exception { + throws Exception { ManagedChatModelStore.getManagedModelStore(core) .addModel(new SolrChatModel(modelName, null, null)); @@ -395,21 +457,21 @@ private DocumentEnrichmentUpdateProcessorFactory initializeUpdateProcessorFactor List inputFieldNames, String outputFieldName, String prompt, String modelName) { NamedList args = new NamedList<>(); - for (String fieldName : inputFieldNames) { + for (String fieldName : inputFieldNames) { args.add("inputField", fieldName); } args.add("outputField", outputFieldName); if (prompt != null) { args.add("promptFile", prompt); - } - else { + } else { args.add("prompt", "Summarize: {" + String.join("}. {", inputFieldNames) + "}."); } args.add("model", modelName); - DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); factory.init(args); return factory; } diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java index 561aafebd309..ed6b426834da 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java @@ -67,7 +67,10 @@ private void loadDummyChatModel(String modelId, String response) throws Exceptio model.put("class", "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel"); model.put("name", modelId); model.put("params", Map.of("response", response)); - assertJPut(ManagedChatModelStore.REST_END_POINT, Utils.toJSONString(model), "/responseHeader/status==0"); + assertJPut( + ManagedChatModelStore.REST_END_POINT, + Utils.toJSONString(model), + "/responseHeader/status==0"); loadedModelId = modelId; } @@ -75,8 +78,11 @@ private void loadDummyChatModel(String modelId, String response) throws Exceptio public void processAdd_inputField_shouldEnrichInputField() throws Exception { loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); - addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); - addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment"); + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + addWithChain( + sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), + "documentEnrichment"); assertU(commit()); final SolrQuery query = getEnrichmentQuery("enriched_field"); @@ -115,7 +121,8 @@ public void processAdd_modelNotFound_shouldThrowException() { public void processAdd_emptyInputField_shouldLogAndIndexWithNoEnrichedField() throws Exception { loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain(sdoc("id", "99", "string_field", ""), "documentEnrichment"); - addWithChain(sdoc("id", "98", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + addWithChain( + sdoc("id", "98", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); assertU(commit()); final SolrQuery query = getEnrichmentQuery("enriched_field"); @@ -132,7 +139,8 @@ public void processAdd_emptyInputField_shouldLogAndIndexWithNoEnrichedField() th @Test public void processAdd_nullInputField_shouldLogAndIndexWithNoEnrichedField() throws Exception { loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); - addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); assertU(adoc("id", "98")); // no string_field assertU(commit()); @@ -150,8 +158,12 @@ public void processAdd_nullInputField_shouldLogAndIndexWithNoEnrichedField() thr @Test public void processAdd_failingEnrichment_shouldLogAndIndexWithNoEnrichedField() throws Exception { loadTestChatModel("exception-throwing-chat-model.json", "exception-throwing-chat-model"); - addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "failingDocumentEnrichment"); - addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "failingDocumentEnrichment"); + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), + "failingDocumentEnrichment"); + addWithChain( + sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), + "failingDocumentEnrichment"); assertU(commit()); final SolrQuery query = getEnrichmentQuery("enriched_field"); @@ -201,8 +213,17 @@ public void processAtomicUpdate_shouldReplaceExistingEnrichedFieldNotAppend() th // modified via atomic update, the enriched content is recomputed and replaces the previous // value rather than being appended. loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); - assertU(adoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "enriched_field", "old content")); - addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment"); + assertU( + adoc( + "id", + "99", + "string_field", + "Vegeta is the saiyan prince.", + "enriched_field", + "old content")); + addWithChain( + sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), + "documentEnrichment"); assertU(commit()); final SolrQuery query = getEnrichmentQuery("enriched_field"); @@ -217,7 +238,8 @@ public void processAtomicUpdate_shouldReplaceExistingEnrichedFieldNotAppend() th SolrInputDocument atomicDoc = new SolrInputDocument(); atomicDoc.setField("id", "99"); - atomicDoc.setField("string_field", Map.of("set", "Vegeta is the saiyan prince from the Dragon Ball series.")); + atomicDoc.setField( + "string_field", Map.of("set", "Vegeta is the saiyan prince from the Dragon Ball series.")); addWithChain(atomicDoc, "documentEnrichmentForPartialUpdates"); assertU(commit()); @@ -241,10 +263,22 @@ public void processAdd_arrInputField_shouldEnrichDocument() throws Exception { DummyChatModel.lastReceivedPrompt = null; addWithChain( - sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "body_field", "He is very proud."), + sdoc( + "id", + "99", + "string_field", + "Vegeta is the saiyan prince.", + "body_field", + "He is very proud."), "documentEnrichmentArrInputField"); addWithChain( - sdoc("id", "98", "string_field", "Kakaroth is a saiyan.", "body_field", "He grew up on Earth."), + sdoc( + "id", + "98", + "string_field", + "Kakaroth is a saiyan.", + "body_field", + "He grew up on Earth."), "documentEnrichmentArrInputField"); assertU(commit()); @@ -260,16 +294,29 @@ public void processAdd_arrInputField_shouldEnrichDocument() throws Exception { } @Test - public void processAdd_multipleInputFields_allPresent_shouldEnrichDocumentWithBothFields() throws Exception { + public void processAdd_multipleInputFields_allPresent_shouldEnrichDocumentWithBothFields() + throws Exception { loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); DummyChatModel.lastReceivedPrompt = null; addWithChain( - sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "body_field", "He is very proud."), + sdoc( + "id", + "99", + "string_field", + "Vegeta is the saiyan prince.", + "body_field", + "He is very proud."), "documentEnrichmentMultiField"); addWithChain( - sdoc("id", "98", "string_field", "Kakaroth is a saiyan.", "body_field", "He grew up on Earth."), + sdoc( + "id", + "98", + "string_field", + "Kakaroth is a saiyan.", + "body_field", + "He grew up on Earth."), "documentEnrichmentMultiField"); assertU(commit()); @@ -290,7 +337,8 @@ public void processAdd_multipleInputFields_allPresent_shouldEnrichDocumentWithBo } @Test - public void processAdd_multipleInputFields_firstFieldNull_shouldSkipEnrichment() throws Exception { + public void processAdd_multipleInputFields_firstFieldNull_shouldSkipEnrichment() + throws Exception { loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain( @@ -313,7 +361,8 @@ public void processAdd_multipleInputFields_firstFieldNull_shouldSkipEnrichment() } @Test - public void processAdd_multipleInputFields_secondFieldEmpty_shouldSkipEnrichment() throws Exception { + public void processAdd_multipleInputFields_secondFieldEmpty_shouldSkipEnrichment() + throws Exception { loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain( @@ -336,7 +385,8 @@ public void processAdd_multipleInputFields_secondFieldEmpty_shouldSkipEnrichment } @Test - public void processAdd_multipleInputFields_bothFieldsAbsent_shouldSkipEnrichment() throws Exception { + public void processAdd_multipleInputFields_bothFieldsAbsent_shouldSkipEnrichment() + throws Exception { loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain(sdoc("id", "99"), "documentEnrichmentMultiField"); @@ -355,14 +405,27 @@ public void processAdd_multipleInputFields_bothFieldsAbsent_shouldSkipEnrichment } @Test - public void processAdd_multipleInputFields_failingModel_shouldLogAndSkipEnrichment() throws Exception { + public void processAdd_multipleInputFields_failingModel_shouldLogAndSkipEnrichment() + throws Exception { loadTestChatModel("exception-throwing-chat-model.json", "exception-throwing-chat-model"); addWithChain( - sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "body_field", "He is very proud."), + sdoc( + "id", + "99", + "string_field", + "Vegeta is the saiyan prince.", + "body_field", + "He is very proud."), "failingDocumentEnrichmentMultiField"); addWithChain( - sdoc("id", "98", "string_field", "Kakaroth is a saiyan.", "body_field", "He grew up on Earth."), + sdoc( + "id", + "98", + "string_field", + "Kakaroth is a saiyan.", + "body_field", + "He grew up on Earth."), "failingDocumentEnrichmentMultiField"); assertU(commit()); @@ -378,7 +441,8 @@ public void processAdd_multipleInputFields_failingModel_shouldLogAndSkipEnrichme } @Test - public void processAdd_multivaluedInputField_shouldInterpolateCollectionAndEnrichDocument() throws Exception { + public void processAdd_multivaluedInputField_shouldInterpolateCollectionAndEnrichDocument() + throws Exception { // When an input field is multivalued, SolrInputField.getValue() returns the Collection, // whose toString() is used for prompt interpolation (e.g. "[tag1, tag2, tag3]"). // Enrichment must proceed — the collection is non-null and non-empty. @@ -406,8 +470,7 @@ public void processAdd_multivaluedInputField_shouldInterpolateCollectionAndEnric "/response/docs/[1]/id=='98'", "/response/docs/[1]/enriched_field=='enriched content'"); - assertEquals( - "Classify these tags: [tag1, tag2, tag3]", DummyChatModel.lastReceivedPrompt); + assertEquals("Classify these tags: [tag1, tag2, tag3]", DummyChatModel.lastReceivedPrompt); } @Test @@ -434,15 +497,42 @@ public void processAdd_multivaluedStringOutputField_emptyInput_shouldSkipEnrichm @Test public void processAdd_singleTypedOutputField_shouldPopulateValue() throws Exception { - record TypedCase(String modelId, String response, String chain, String field, String expectedValue) {} - List typedCases = List.of( - new TypedCase("dummy-long", "{\"value\": 3000000000}", "documentEnrichmentSingleLong", "output_long", "3000000000"), - new TypedCase("dummy-int", "{\"value\": 7}", "documentEnrichmentSingleInt", "output_int", "7"), - new TypedCase("dummy-float", "{\"value\": 1.5}", "documentEnrichmentSingleFloat", "output_float", "1.5"), - new TypedCase("dummy-double", "{\"value\": 1e308}", "documentEnrichmentSingleDouble", "output_double", "1e308"), - new TypedCase("dummy-boolean", "{\"value\": true}", "documentEnrichmentSingleBoolean", "output_boolean", "true"), - new TypedCase("dummy-date", "{\"value\": \"2024-01-15T00:00:00Z\"}", "documentEnrichmentSingleDate", "output_date", "'2024-01-15T00:00:00Z'") - ); + record TypedCase( + String modelId, String response, String chain, String field, String expectedValue) {} + List typedCases = + List.of( + new TypedCase( + "dummy-long", + "{\"value\": 3000000000}", + "documentEnrichmentSingleLong", + "output_long", + "3000000000"), + new TypedCase( + "dummy-int", "{\"value\": 7}", "documentEnrichmentSingleInt", "output_int", "7"), + new TypedCase( + "dummy-float", + "{\"value\": 1.5}", + "documentEnrichmentSingleFloat", + "output_float", + "1.5"), + new TypedCase( + "dummy-double", + "{\"value\": 1e308}", + "documentEnrichmentSingleDouble", + "output_double", + "1e308"), + new TypedCase( + "dummy-boolean", + "{\"value\": true}", + "documentEnrichmentSingleBoolean", + "output_boolean", + "true"), + new TypedCase( + "dummy-date", + "{\"value\": \"2024-01-15T00:00:00Z\"}", + "documentEnrichmentSingleDate", + "output_date", + "'2024-01-15T00:00:00Z'")); for (TypedCase typedCase : typedCases) { loadDummyChatModel(typedCase.modelId(), typedCase.response()); @@ -468,23 +558,52 @@ record TypedCase(String modelId, String response, String chain, String field, St @Test public void processAdd_multivaluedTypedOutputField_shouldPopulateAllValues() throws Exception { - record TypeCaseMulti(String modelId, String response, String chain, String field, List expectedValues) {} - List typedCaseMultis = List.of( - new TypeCaseMulti("dummy-chat-multivalued-1", "{\"value\": [\"tag1\", \"tag2\"]}", - "documentEnrichmentMultivaluedString", "enriched_field_multi", List.of("'tag1'", "'tag2'")), - new TypeCaseMulti("dummy-long-multi", "{\"value\": [1000000000, 2000000000, 3000000000]}", - "documentEnrichmentMultivaluedLong", "output_long_multi", List.of("1000000000", "2000000000", "3000000000")), - new TypeCaseMulti("dummy-int-multi", "{\"value\": [1, 2]}", - "documentEnrichmentMultivaluedInt", "output_int_multi", List.of("1", "2")), - new TypeCaseMulti("dummy-float-multi", "{\"value\": [1.5, 2.5]}", - "documentEnrichmentMultivaluedFloat", "output_float_multi", List.of("1.5", "2.5")), - new TypeCaseMulti("dummy-double-multi","{\"value\": [1e308, 1.1e308]}", - "documentEnrichmentMultivaluedDouble", "output_double_multi", List.of("1e308", "1.1e308")), - new TypeCaseMulti("dummy-boolean-multi", "{\"value\": [true, false]}", - "documentEnrichmentMultivaluedBoolean", "output_boolean_multi", List.of("true", "false")), - new TypeCaseMulti("dummy-date-multi", "{\"value\": [\"2024-01-15T00:00:00Z\", \"2025-06-30T00:00:00Z\"]}", - "documentEnrichmentMultivaluedDate", "output_date_multi", List.of("'2024-01-15T00:00:00Z'", "'2025-06-30T00:00:00Z'")) - ); + record TypeCaseMulti( + String modelId, String response, String chain, String field, List expectedValues) {} + List typedCaseMultis = + List.of( + new TypeCaseMulti( + "dummy-chat-multivalued-1", + "{\"value\": [\"tag1\", \"tag2\"]}", + "documentEnrichmentMultivaluedString", + "enriched_field_multi", + List.of("'tag1'", "'tag2'")), + new TypeCaseMulti( + "dummy-long-multi", + "{\"value\": [1000000000, 2000000000, 3000000000]}", + "documentEnrichmentMultivaluedLong", + "output_long_multi", + List.of("1000000000", "2000000000", "3000000000")), + new TypeCaseMulti( + "dummy-int-multi", + "{\"value\": [1, 2]}", + "documentEnrichmentMultivaluedInt", + "output_int_multi", + List.of("1", "2")), + new TypeCaseMulti( + "dummy-float-multi", + "{\"value\": [1.5, 2.5]}", + "documentEnrichmentMultivaluedFloat", + "output_float_multi", + List.of("1.5", "2.5")), + new TypeCaseMulti( + "dummy-double-multi", + "{\"value\": [1e308, 1.1e308]}", + "documentEnrichmentMultivaluedDouble", + "output_double_multi", + List.of("1e308", "1.1e308")), + new TypeCaseMulti( + "dummy-boolean-multi", + "{\"value\": [true, false]}", + "documentEnrichmentMultivaluedBoolean", + "output_boolean_multi", + List.of("true", "false")), + new TypeCaseMulti( + "dummy-date-multi", + "{\"value\": [\"2024-01-15T00:00:00Z\", \"2025-06-30T00:00:00Z\"]}", + "documentEnrichmentMultivaluedDate", + "output_date_multi", + List.of("'2024-01-15T00:00:00Z'", "'2025-06-30T00:00:00Z'"))); for (TypeCaseMulti typedCase : typedCaseMultis) { loadDummyChatModel(typedCase.modelId(), typedCase.response()); @@ -499,7 +618,15 @@ record TypeCaseMulti(String modelId, String response, String chain, String field String docId = docIdx == 0 ? "'99'" : "'98'"; assertions.add("/response/docs/[" + docIdx + "]/id==" + docId); for (int i = 0; i < typedCase.expectedValues().size(); i++) { - assertions.add("/response/docs/[" + docIdx + "]/" + typedCase.field() + "/[" + i + "]==" + typedCase.expectedValues().get(i)); + assertions.add( + "/response/docs/[" + + docIdx + + "]/" + + typedCase.field() + + "/[" + + i + + "]==" + + typedCase.expectedValues().get(i)); } } assertJQ("/query" + query.toQueryString(), assertions.toArray(new String[0])); @@ -517,8 +644,11 @@ public void processAdd_llmResponseMissingValueKey_shouldLogAndIndexWithNoEnriche // Model returns valid JSON but without the required "value" key loadDummyChatModel("dummy-chat-1", "{\"result\": \"some value\"}"); - addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); - addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment"); + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + addWithChain( + sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), + "documentEnrichment"); assertU(commit()); final SolrQuery query = getEnrichmentQuery("enriched_field"); @@ -538,8 +668,11 @@ public void processAdd_llmResponseMalformedJson_shouldLogAndIndexWithNoEnrichedF // Model returns a plain string that cannot be parsed as JSON loadDummyChatModel("dummy-chat-1", "not valid json at all"); - addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); - addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment"); + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + addWithChain( + sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), + "documentEnrichment"); assertU(commit()); final SolrQuery query = getEnrichmentQuery("enriched_field"); @@ -569,8 +702,7 @@ public void processAdd_dateOutputField_malformedDateString_shouldFailToIndex() t RemoteSolrException.class, () -> addWithChain( - sdoc("id", "99", "string_field", "some content"), - "documentEnrichmentSingleDate")); + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleDate")); } @Test @@ -599,8 +731,10 @@ public void processAdd_intOutputField_decimalResponse_shouldTruncateAndIndex() t public void processAdd_doubleOutputField_intResponse_shouldConvertAndIndex() throws Exception { loadDummyChatModel("dummy-double", "{\"value\": 3}"); - addWithChain(sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleDouble"); - addWithChain(sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleDouble"); + addWithChain( + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleDouble"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleDouble"); assertU(commit()); final SolrQuery query = getEnrichmentQuery("output_double"); @@ -622,7 +756,8 @@ public void processAdd_floatOutputField_doubleResponse_shouldRoundToFloatPrecisi loadDummyChatModel("dummy-float", "{\"value\": 3.141592653589793}"); addWithChain(sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleFloat"); - addWithChain(sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleFloat"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleFloat"); assertU(commit()); final SolrQuery query = getEnrichmentQuery("output_float"); @@ -641,7 +776,8 @@ public void processAdd_floatOutputField_doubleResponse_shouldRoundToFloatPrecisi public void processAdd_multivaluedOutputField_singleValuedLlmResponse_shouldStoreSingleValue() throws Exception { // Model returns {"value": "a single string"} for a multivalued output field. - // The scalar is stored as a single-element multivalued field content (e.g., list with only one element). + // The scalar is stored as a single-element multivalued field content (e.g., list with only one + // element). loadDummyChatModel("dummy-chat-multivalued-1", "{\"value\": \"a single string\"}"); addWithChain( @@ -666,7 +802,7 @@ public void processAdd_multivaluedOutputField_singleValuedLlmResponse_shouldStor private SolrQuery getEnrichmentQuery(String enrichedFieldName) { final SolrQuery query = new SolrQuery(); query.setQuery("*:*"); - query.add("fl", "id,"+enrichedFieldName); + query.add("fl", "id," + enrichedFieldName); query.add("sort", "id desc"); return query; } From af48a10877cdfa0ffab8f60297b284d7931482b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Fri, 17 Apr 2026 15:06:39 +0200 Subject: [PATCH 16/17] [llm-document-enrichment] Post-check changes. Now `./gradlew check` works --- .../store/rest/TestChatModelManagerPersistence.java | 6 ++---- .../pages/update-request-processors.adoc | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java index 329de3fa70b5..1c203478a69e 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java @@ -81,8 +81,7 @@ public void testModelStorePersistence() throws Exception { assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); // check persistence after restart - getJetty().stop(); - getJetty().start(); + restartJetty(); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gpt-5.4-nano'"); @@ -93,8 +92,7 @@ public void testModelStorePersistence() throws Exception { restTestHarness.reload(); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/==[]"); - getJetty().stop(); - getJetty().start(); + restartJetty(); assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/==[]"); } } diff --git a/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc b/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc index a968851e01bc..4083056a4a6d 100644 --- a/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc +++ b/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc @@ -421,7 +421,7 @@ The {solr-javadocs}/modules/language-models/index.html[`language-models`] module It uses external text to vectors language models to perform the vectorisation for each processed document. For more information: xref:query-guide:text-to-vector.adoc[Update Request Processor] -{solr-javadocs}/modules/language-models/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentProcessorFactory.html[DocumentEnrichmentProcessorFactory]:: Update processor which, starting from one or more fields in input and a given prompt, adds the output of an LLM as the value of a new field. +{solr-javadocs}/modules/language-models/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.html[DocumentEnrichmentUpdateProcessorFactory]:: Update processor which, starting from one or more fields in input and a given prompt, adds the output of an LLM as the value of a new field. It uses external chat language models to perform the enrichment of each processed document. For more information: xref:indexing-guide:document-enrichment-with-llms.adoc[Document Enrichment documentation] From f15285ebe551497889ed96e9693b48954fa54d39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Fri, 17 Apr 2026 18:15:05 +0200 Subject: [PATCH 17/17] [llm-document-enrichment] Fix GitHub actions failures --- ...richment.yml => SOLR-18187-llm-document-enrichment.yml} | 3 +++ .../DocumentEnrichmentUpdateProcessorFactoryTest.java | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) rename changelog/unreleased/{llm-document-enrichment.yml => SOLR-18187-llm-document-enrichment.yml} (72%) diff --git a/changelog/unreleased/llm-document-enrichment.yml b/changelog/unreleased/SOLR-18187-llm-document-enrichment.yml similarity index 72% rename from changelog/unreleased/llm-document-enrichment.yml rename to changelog/unreleased/SOLR-18187-llm-document-enrichment.yml index fd6e55d6249f..e5d43c189b38 100644 --- a/changelog/unreleased/llm-document-enrichment.yml +++ b/changelog/unreleased/SOLR-18187-llm-document-enrichment.yml @@ -2,3 +2,6 @@ title: Add DocumentEnrichmentUpdateProcessorFactory for LLM-based document enric type: added # added, changed, fixed, deprecated, removed, dependency_update, security, other authors: - name: Nicolò Rinaldi +links: + - name: SOLR-18187 + url: https://issues.apache.org/jira/browse/SOLR-18187 diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java index 6b4bf984fa88..c7fcc1c1b5f9 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java @@ -28,6 +28,7 @@ import org.apache.solr.languagemodels.documentenrichment.store.rest.ManagedChatModelStore; import org.apache.solr.request.SolrQueryRequestBase; import org.apache.solr.update.processor.UpdateRequestProcessor; +import dev.langchain4j.model.chat.request.ResponseFormatType; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; @@ -389,7 +390,7 @@ public void init_sortableTextOutputField_buildResponseFormat_shouldProduceString var schemaField = collection1.getLatestSchema().getField("output_sortable_text"); var responseFormat = DocumentEnrichmentUpdateProcessorFactory.buildResponseFormat(schemaField); assertNotNull(responseFormat); - assertEquals(dev.langchain4j.model.chat.request.ResponseFormatType.JSON, responseFormat.type()); + assertEquals(ResponseFormatType.JSON, responseFormat.type()); assertNotNull(responseFormat.jsonSchema()); } @@ -401,7 +402,7 @@ public void init_multivaluedStringOutputField_buildResponseFormat_shouldProduceA assertTrue(schemaField.multiValued()); var responseFormat = DocumentEnrichmentUpdateProcessorFactory.buildResponseFormat(schemaField); assertNotNull(responseFormat); - assertEquals(dev.langchain4j.model.chat.request.ResponseFormatType.JSON, responseFormat.type()); + assertEquals(ResponseFormatType.JSON, responseFormat.type()); assertNotNull(responseFormat.jsonSchema()); } @@ -412,7 +413,7 @@ public void init_singleValuedStringOutputField_buildResponseFormat_shouldProduce assertFalse(schemaField.multiValued()); var responseFormat = DocumentEnrichmentUpdateProcessorFactory.buildResponseFormat(schemaField); assertNotNull(responseFormat); - assertEquals(dev.langchain4j.model.chat.request.ResponseFormatType.JSON, responseFormat.type()); + assertEquals(ResponseFormatType.JSON, responseFormat.type()); assertNotNull(responseFormat.jsonSchema()); }