From 10034a7b1f2eaf3e507ffb9c079ee471daac2cce Mon Sep 17 00:00:00 2001 From: AntMoraisElastic Date: Thu, 13 Nov 2025 17:39:36 +0000 Subject: [PATCH 01/11] Add gp-llm-v2 model ID and pre-configured inference endpoint --- ...eGetModelsWithElasticInferenceServiceIT.java | 5 +++-- ...sticInferenceServiceAuthorizationServer.java | 4 ++++ .../AuthorizationTaskExecutorIT.java | 14 ++++++++++++-- ...uthorizationTaskExecutorMultipleNodesIT.java | 10 +++++++++- .../elastic/InternalPreconfiguredEndpoints.java | 17 +++++++++++++++++ .../elastic/ElasticInferenceServiceTests.java | 2 +- .../PreconfiguredEndpointModelAdapterTests.java | 17 +++++++++++++++++ 7 files changed, 63 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java index b95de2f72e43e..fd253e6f99ca8 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java @@ -42,14 +42,15 @@ public void testGetDefaultEndpoints() throws IOException { var allModels = getAllModels(); var chatCompletionModels = getModels("_all", TaskType.CHAT_COMPLETION); - assertThat(allModels, hasSize(7)); - assertThat(chatCompletionModels, hasSize(1)); + assertThat(allModels, hasSize(8)); + assertThat(chatCompletionModels, hasSize(2)); for (var model : chatCompletionModels) { assertEquals("chat_completion", model.get("task_type")); } assertInferenceIdTaskType(allModels, ".rainbow-sprinkles-elastic", TaskType.CHAT_COMPLETION); + assertInferenceIdTaskType(allModels, ".gp-llm-v2", TaskType.CHAT_COMPLETION); assertInferenceIdTaskType(allModels, ".elser-2-elastic", TaskType.SPARSE_EMBEDDING); assertInferenceIdTaskType(allModels, ".jina-embeddings-v3", TaskType.TEXT_EMBEDDING); assertInferenceIdTaskType(allModels, ".elastic-rerank-v1", TaskType.RERANK); diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockElasticInferenceServiceAuthorizationServer.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockElasticInferenceServiceAuthorizationServer.java index dba1ce3c9e5f9..8bb9a7e576baf 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockElasticInferenceServiceAuthorizationServer.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockElasticInferenceServiceAuthorizationServer.java @@ -38,6 +38,10 @@ public void enqueueAuthorizeAllModelsResponse() { "model_name": "rainbow-sprinkles", "task_types": ["chat"] }, + { + "model_name": "gp-llm-v2", + "task_types": ["chat"] + }, { "model_name": "elser_model_2", "task_types": ["embed/text/sparse"] diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorIT.java index 8450ceab04848..b43d0739c30a5 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorIT.java @@ -54,7 +54,7 @@ public class AuthorizationTaskExecutorIT extends ESSingleNodeTestCase { ] } """; - + // Should we add gp-llm-v2 to the response? public static final String AUTHORIZED_RAINBOW_SPRINKLES_RESPONSE = """ { "models": [ @@ -203,13 +203,18 @@ public void testCreatesEisChatCompletion_DoesNotRemoveEndpointWhenNoLongerAuthor private void assertChatCompletionEndpointExists() { var eisEndpoints = getEisEndpoints(); - assertThat(eisEndpoints.size(), is(1)); + assertThat(eisEndpoints.size(), is(2)); var rainbowSprinklesModel = eisEndpoints.get(0); assertChatCompletionUnparsedModel(rainbowSprinklesModel); assertTrue( modelRegistry.containsPreconfiguredInferenceEndpointId(InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1) ); + var gpLlmV2Model = eisEndpoints.get(1); + assertChatCompletionUnparsedModel(gpLlmV2Model); + assertTrue( + modelRegistry.containsPreconfiguredInferenceEndpointId(InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2) + ); } private void assertChatCompletionUnparsedModel(UnparsedModel rainbowSprinklesModel) { @@ -217,6 +222,11 @@ private void assertChatCompletionUnparsedModel(UnparsedModel rainbowSprinklesMod assertThat(rainbowSprinklesModel.service(), is(ElasticInferenceService.NAME)); assertThat(rainbowSprinklesModel.inferenceEntityId(), is(InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1)); } + private void assertChatCompletionUnparsedModel(UnparsedModel gpLlmV2Model) { + assertThat(gpLlmV2Model.taskType(), is(TaskType.CHAT_COMPLETION)); + assertThat(gpLlmV2Model.service(), is(ElasticInferenceService.NAME)); + assertThat(gpLlmV2Model.inferenceEntityId(), is(InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2)); + } public void testCreatesChatCompletion_AndThenCreatesTextEmbedding() throws Exception { assertNoAuthorizedEisEndpoints(); diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorMultipleNodesIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorMultipleNodesIT.java index cb92c70d27442..6f81aebbe8f37 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorMultipleNodesIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorMultipleNodesIT.java @@ -123,7 +123,7 @@ public void testAuthorizationTaskGetsRelocatedToAnotherNode_WhenTheNodeThatIsRun .stream() .filter(endpoint -> endpoint.getService().equals(ElasticInferenceService.NAME)) .toList(); - assertThat(eisEndpoints.size(), is(1)); + assertThat(eisEndpoints.size(), is(2)); var rainbowSprinklesEndpoint = eisEndpoints.get(0); assertThat(rainbowSprinklesEndpoint.getService(), is(ElasticInferenceService.NAME)); @@ -132,6 +132,14 @@ public void testAuthorizationTaskGetsRelocatedToAnotherNode_WhenTheNodeThatIsRun is(InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1) ); assertThat(rainbowSprinklesEndpoint.getTaskType(), is(TaskType.CHAT_COMPLETION)); + + var gpLlmV2Endpoint = eisEndpoints.get(1); + assertThat(gpLlmV2Endpoint.getService(), is(ElasticInferenceService.NAME)); + assertThat( + gpLlmV2Endpoint.getInferenceEntityId(), + is(InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2) + ); + assertThat(gpLlmV2Endpoint.getTaskType(), is(TaskType.CHAT_COMPLETION)); }); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java index 904a328491f33..735c45584c287 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java @@ -33,6 +33,10 @@ public class InternalPreconfiguredEndpoints { public static final String DEFAULT_CHAT_COMPLETION_MODEL_ID_V1 = "rainbow-sprinkles"; public static final String DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1 = ".rainbow-sprinkles-elastic"; + // gp-llm-v2 + public static final String DEFAULT_CHAT_COMPLETION_MODEL_ID_V2 = "gp-llm-v2"; + public static final String DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2 = ".gp-llm-v2"; + // elser-2 public static final String DEFAULT_ELSER_2_MODEL_ID = "elser_model_2"; public static final String DEFAULT_ELSER_ENDPOINT_ID_V2 = ".elser-2-elastic"; @@ -53,6 +57,8 @@ public record MinimalModel( private static final ElasticInferenceServiceCompletionServiceSettings COMPLETION_SERVICE_SETTINGS = new ElasticInferenceServiceCompletionServiceSettings(DEFAULT_CHAT_COMPLETION_MODEL_ID_V1); + private static final ElasticInferenceServiceCompletionServiceSettings COMPLETION_SERVICE_SETTINGS_V2 = + new ElasticInferenceServiceCompletionServiceSettings(DEFAULT_CHAT_COMPLETION_MODEL_ID_V2); private static final ElasticInferenceServiceSparseEmbeddingsServiceSettings SPARSE_EMBEDDINGS_SERVICE_SETTINGS = new ElasticInferenceServiceSparseEmbeddingsServiceSettings(DEFAULT_ELSER_2_MODEL_ID, null); private static final ElasticInferenceServiceDenseTextEmbeddingsServiceSettings DENSE_TEXT_EMBEDDINGS_SERVICE_SETTINGS = @@ -80,6 +86,17 @@ public record MinimalModel( COMPLETION_SERVICE_SETTINGS ) ), + DEFAULT_CHAT_COMPLETION_MODEL_ID_V2, + new MinimalModel( + new ModelConfigurations( + DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2, + TaskType.CHAT_COMPLETION, + ElasticInferenceService.NAME, + COMPLETION_SERVICE_SETTINGS_V2, + ChunkingSettingsBuilder.DEFAULT_SETTINGS + ), + COMPLETION_SERVICE_SETTINGS_V2 + ), DEFAULT_ELSER_2_MODEL_ID, List.of( new MinimalModel( diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java index 4b17cab04471a..808b89c9e812d 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java @@ -1064,7 +1064,7 @@ public void testSupportedTaskTypes_Returns_Unsupported() throws Exception { expectThrows(UnsupportedOperationException.class, service::supportedTaskTypes); } } - + // Should we add another test for gp-llm-v2? public void testUnifiedCompletionError() { var e = assertThrows(UnifiedChatCompletionException.class, () -> testUnifiedStream(404, """ { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java index e718c83c3f965..6f6bbcd03a9f0 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java @@ -27,6 +27,8 @@ import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_MODEL_ID_V1; +import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2; +import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_MODEL_ID_V2; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_ELSER_2_MODEL_ID; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_ELSER_ENDPOINT_ID_V2; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_MULTILINGUAL_EMBED_ENDPOINT_ID; @@ -45,6 +47,8 @@ public class PreconfiguredEndpointModelAdapterTests extends ESTestCase { new ElasticInferenceServiceSparseEmbeddingsServiceSettings(DEFAULT_ELSER_2_MODEL_ID, null); private static final ElasticInferenceServiceCompletionServiceSettings COMPLETION_SETTINGS = new ElasticInferenceServiceCompletionServiceSettings(DEFAULT_CHAT_COMPLETION_MODEL_ID_V1); + private static final ElasticInferenceServiceCompletionServiceSettings COMPLETION_SETTINGS_V2 = + new ElasticInferenceServiceCompletionServiceSettings(DEFAULT_CHAT_COMPLETION_MODEL_ID_V2); private static final ElasticInferenceServiceDenseTextEmbeddingsServiceSettings DENSE_SETTINGS = new ElasticInferenceServiceDenseTextEmbeddingsServiceSettings( DEFAULT_MULTILINGUAL_EMBED_MODEL_ID, @@ -60,6 +64,7 @@ public class PreconfiguredEndpointModelAdapterTests extends ESTestCase { public void testGetModelsWithValidId() { var endpointIds = Set.of( DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1, + DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2, DEFAULT_ELSER_ENDPOINT_ID_V2, DEFAULT_RERANK_ENDPOINT_ID_V1, DEFAULT_MULTILINGUAL_EMBED_ENDPOINT_ID @@ -94,6 +99,18 @@ public void testGetModelsWithValidId() { COMPLETION_SETTINGS, EIS_COMPONENTS ), + new ElasticInferenceServiceModel( + new ModelConfigurations( + DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2, + TaskType.CHAT_COMPLETION, + ElasticInferenceService.NAME, + COMPLETION_SETTINGS_V2, + ChunkingSettingsBuilder.DEFAULT_SETTINGS + ), + new ModelSecrets(EmptySecretSettings.INSTANCE), + COMPLETION_SETTINGS_V2, + EIS_COMPONENTS + ), new ElasticInferenceServiceModel( new ModelConfigurations( DEFAULT_MULTILINGUAL_EMBED_ENDPOINT_ID, From a047a083df8d90c024635c0c9e26cbf8c35ffd3b Mon Sep 17 00:00:00 2001 From: AntMoraisElastic Date: Fri, 14 Nov 2025 11:29:16 +0000 Subject: [PATCH 02/11] Rename inference endpoint ID --- .../InferenceGetModelsWithElasticInferenceServiceIT.java | 2 +- .../services/elastic/InternalPreconfiguredEndpoints.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java index fd253e6f99ca8..b413a38a052e8 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java @@ -50,7 +50,7 @@ public void testGetDefaultEndpoints() throws IOException { } assertInferenceIdTaskType(allModels, ".rainbow-sprinkles-elastic", TaskType.CHAT_COMPLETION); - assertInferenceIdTaskType(allModels, ".gp-llm-v2", TaskType.CHAT_COMPLETION); + assertInferenceIdTaskType(allModels, ".gp-llm-v2-chat_completion", TaskType.CHAT_COMPLETION); assertInferenceIdTaskType(allModels, ".elser-2-elastic", TaskType.SPARSE_EMBEDDING); assertInferenceIdTaskType(allModels, ".jina-embeddings-v3", TaskType.TEXT_EMBEDDING); assertInferenceIdTaskType(allModels, ".elastic-rerank-v1", TaskType.RERANK); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java index 735c45584c287..0e4969a8342ec 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java @@ -35,7 +35,7 @@ public class InternalPreconfiguredEndpoints { // gp-llm-v2 public static final String DEFAULT_CHAT_COMPLETION_MODEL_ID_V2 = "gp-llm-v2"; - public static final String DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2 = ".gp-llm-v2"; + public static final String DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2 = ".gp-llm-v2-chat_completion"; // elser-2 public static final String DEFAULT_ELSER_2_MODEL_ID = "elser_model_2"; From 3a26c8cdfeb2f737c0712995d5a21618782677bc Mon Sep 17 00:00:00 2001 From: AntMoraisElastic Date: Fri, 14 Nov 2025 11:30:44 +0000 Subject: [PATCH 03/11] Autoformat --- .../inference/integration/AuthorizationTaskExecutorIT.java | 1 + .../AuthorizationTaskExecutorMultipleNodesIT.java | 5 +---- .../services/elastic/ElasticInferenceServiceTests.java | 1 + .../PreconfiguredEndpointModelAdapterTests.java | 2 +- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorIT.java index b43d0739c30a5..0ed61d40e302a 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorIT.java @@ -222,6 +222,7 @@ private void assertChatCompletionUnparsedModel(UnparsedModel rainbowSprinklesMod assertThat(rainbowSprinklesModel.service(), is(ElasticInferenceService.NAME)); assertThat(rainbowSprinklesModel.inferenceEntityId(), is(InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1)); } + private void assertChatCompletionUnparsedModel(UnparsedModel gpLlmV2Model) { assertThat(gpLlmV2Model.taskType(), is(TaskType.CHAT_COMPLETION)); assertThat(gpLlmV2Model.service(), is(ElasticInferenceService.NAME)); diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorMultipleNodesIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorMultipleNodesIT.java index 6f81aebbe8f37..90aa0f7e625cb 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorMultipleNodesIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorMultipleNodesIT.java @@ -135,10 +135,7 @@ public void testAuthorizationTaskGetsRelocatedToAnotherNode_WhenTheNodeThatIsRun var gpLlmV2Endpoint = eisEndpoints.get(1); assertThat(gpLlmV2Endpoint.getService(), is(ElasticInferenceService.NAME)); - assertThat( - gpLlmV2Endpoint.getInferenceEntityId(), - is(InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2) - ); + assertThat(gpLlmV2Endpoint.getInferenceEntityId(), is(InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2)); assertThat(gpLlmV2Endpoint.getTaskType(), is(TaskType.CHAT_COMPLETION)); }); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java index 808b89c9e812d..bfcdc56a183ae 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java @@ -1064,6 +1064,7 @@ public void testSupportedTaskTypes_Returns_Unsupported() throws Exception { expectThrows(UnsupportedOperationException.class, service::supportedTaskTypes); } } + // Should we add another test for gp-llm-v2? public void testUnifiedCompletionError() { var e = assertThrows(UnifiedChatCompletionException.class, () -> testUnifiedStream(404, """ diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java index 6f6bbcd03a9f0..b38e8de39c08d 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java @@ -26,8 +26,8 @@ import java.util.Set; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1; -import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_MODEL_ID_V1; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2; +import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_MODEL_ID_V1; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_MODEL_ID_V2; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_ELSER_2_MODEL_ID; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_ELSER_ENDPOINT_ID_V2; From 5e495e939e6dfe4a1e0257d0e1e9db2c3a4334bc Mon Sep 17 00:00:00 2001 From: AntMoraisElastic Date: Fri, 14 Nov 2025 11:42:20 +0000 Subject: [PATCH 04/11] Rename variables for gp-llm-v2 model ID and inference endpoint ID --- .../integration/AuthorizationTaskExecutorIT.java | 15 ++------------- .../AuthorizationTaskExecutorMultipleNodesIT.java | 7 +------ .../elastic/InternalPreconfiguredEndpoints.java | 10 +++++----- .../elastic/ElasticInferenceServiceTests.java | 2 +- .../PreconfiguredEndpointModelAdapterTests.java | 10 +++++----- 5 files changed, 14 insertions(+), 30 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorIT.java index 0ed61d40e302a..8450ceab04848 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorIT.java @@ -54,7 +54,7 @@ public class AuthorizationTaskExecutorIT extends ESSingleNodeTestCase { ] } """; - // Should we add gp-llm-v2 to the response? + public static final String AUTHORIZED_RAINBOW_SPRINKLES_RESPONSE = """ { "models": [ @@ -203,18 +203,13 @@ public void testCreatesEisChatCompletion_DoesNotRemoveEndpointWhenNoLongerAuthor private void assertChatCompletionEndpointExists() { var eisEndpoints = getEisEndpoints(); - assertThat(eisEndpoints.size(), is(2)); + assertThat(eisEndpoints.size(), is(1)); var rainbowSprinklesModel = eisEndpoints.get(0); assertChatCompletionUnparsedModel(rainbowSprinklesModel); assertTrue( modelRegistry.containsPreconfiguredInferenceEndpointId(InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1) ); - var gpLlmV2Model = eisEndpoints.get(1); - assertChatCompletionUnparsedModel(gpLlmV2Model); - assertTrue( - modelRegistry.containsPreconfiguredInferenceEndpointId(InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2) - ); } private void assertChatCompletionUnparsedModel(UnparsedModel rainbowSprinklesModel) { @@ -223,12 +218,6 @@ private void assertChatCompletionUnparsedModel(UnparsedModel rainbowSprinklesMod assertThat(rainbowSprinklesModel.inferenceEntityId(), is(InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1)); } - private void assertChatCompletionUnparsedModel(UnparsedModel gpLlmV2Model) { - assertThat(gpLlmV2Model.taskType(), is(TaskType.CHAT_COMPLETION)); - assertThat(gpLlmV2Model.service(), is(ElasticInferenceService.NAME)); - assertThat(gpLlmV2Model.inferenceEntityId(), is(InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2)); - } - public void testCreatesChatCompletion_AndThenCreatesTextEmbedding() throws Exception { assertNoAuthorizedEisEndpoints(); diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorMultipleNodesIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorMultipleNodesIT.java index 90aa0f7e625cb..cb92c70d27442 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorMultipleNodesIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/AuthorizationTaskExecutorMultipleNodesIT.java @@ -123,7 +123,7 @@ public void testAuthorizationTaskGetsRelocatedToAnotherNode_WhenTheNodeThatIsRun .stream() .filter(endpoint -> endpoint.getService().equals(ElasticInferenceService.NAME)) .toList(); - assertThat(eisEndpoints.size(), is(2)); + assertThat(eisEndpoints.size(), is(1)); var rainbowSprinklesEndpoint = eisEndpoints.get(0); assertThat(rainbowSprinklesEndpoint.getService(), is(ElasticInferenceService.NAME)); @@ -132,11 +132,6 @@ public void testAuthorizationTaskGetsRelocatedToAnotherNode_WhenTheNodeThatIsRun is(InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1) ); assertThat(rainbowSprinklesEndpoint.getTaskType(), is(TaskType.CHAT_COMPLETION)); - - var gpLlmV2Endpoint = eisEndpoints.get(1); - assertThat(gpLlmV2Endpoint.getService(), is(ElasticInferenceService.NAME)); - assertThat(gpLlmV2Endpoint.getInferenceEntityId(), is(InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2)); - assertThat(gpLlmV2Endpoint.getTaskType(), is(TaskType.CHAT_COMPLETION)); }); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java index 0e4969a8342ec..3328c28eccd73 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java @@ -34,8 +34,8 @@ public class InternalPreconfiguredEndpoints { public static final String DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1 = ".rainbow-sprinkles-elastic"; // gp-llm-v2 - public static final String DEFAULT_CHAT_COMPLETION_MODEL_ID_V2 = "gp-llm-v2"; - public static final String DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2 = ".gp-llm-v2-chat_completion"; + public static final String GP_LLM_V2_MODEL_ID = "gp-llm-v2"; + public static final String GP_LLM_V2_ENDPOINT_ID = ".gp-llm-v2-chat_completion"; // elser-2 public static final String DEFAULT_ELSER_2_MODEL_ID = "elser_model_2"; @@ -58,7 +58,7 @@ public record MinimalModel( private static final ElasticInferenceServiceCompletionServiceSettings COMPLETION_SERVICE_SETTINGS = new ElasticInferenceServiceCompletionServiceSettings(DEFAULT_CHAT_COMPLETION_MODEL_ID_V1); private static final ElasticInferenceServiceCompletionServiceSettings COMPLETION_SERVICE_SETTINGS_V2 = - new ElasticInferenceServiceCompletionServiceSettings(DEFAULT_CHAT_COMPLETION_MODEL_ID_V2); + new ElasticInferenceServiceCompletionServiceSettings(GP_LLM_V2_MODEL_ID); private static final ElasticInferenceServiceSparseEmbeddingsServiceSettings SPARSE_EMBEDDINGS_SERVICE_SETTINGS = new ElasticInferenceServiceSparseEmbeddingsServiceSettings(DEFAULT_ELSER_2_MODEL_ID, null); private static final ElasticInferenceServiceDenseTextEmbeddingsServiceSettings DENSE_TEXT_EMBEDDINGS_SERVICE_SETTINGS = @@ -86,10 +86,10 @@ public record MinimalModel( COMPLETION_SERVICE_SETTINGS ) ), - DEFAULT_CHAT_COMPLETION_MODEL_ID_V2, + GP_LLM_V2_MODEL_ID, new MinimalModel( new ModelConfigurations( - DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2, + GP_LLM_V2_ENDPOINT_ID, TaskType.CHAT_COMPLETION, ElasticInferenceService.NAME, COMPLETION_SERVICE_SETTINGS_V2, diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java index bfcdc56a183ae..89312362599f4 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java @@ -1065,7 +1065,7 @@ public void testSupportedTaskTypes_Returns_Unsupported() throws Exception { } } - // Should we add another test for gp-llm-v2? + public void testUnifiedCompletionError() { var e = assertThrows(UnifiedChatCompletionException.class, () -> testUnifiedStream(404, """ { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java index b38e8de39c08d..b3ccc6ce0a7f4 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java @@ -26,9 +26,9 @@ import java.util.Set; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1; -import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2; +import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.GP_LLM_V2_ENDPOINT_ID; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_MODEL_ID_V1; -import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_MODEL_ID_V2; +import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.GP_LLM_V2_MODEL_ID; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_ELSER_2_MODEL_ID; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_ELSER_ENDPOINT_ID_V2; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_MULTILINGUAL_EMBED_ENDPOINT_ID; @@ -48,7 +48,7 @@ public class PreconfiguredEndpointModelAdapterTests extends ESTestCase { private static final ElasticInferenceServiceCompletionServiceSettings COMPLETION_SETTINGS = new ElasticInferenceServiceCompletionServiceSettings(DEFAULT_CHAT_COMPLETION_MODEL_ID_V1); private static final ElasticInferenceServiceCompletionServiceSettings COMPLETION_SETTINGS_V2 = - new ElasticInferenceServiceCompletionServiceSettings(DEFAULT_CHAT_COMPLETION_MODEL_ID_V2); + new ElasticInferenceServiceCompletionServiceSettings(GP_LLM_V2_MODEL_ID); private static final ElasticInferenceServiceDenseTextEmbeddingsServiceSettings DENSE_SETTINGS = new ElasticInferenceServiceDenseTextEmbeddingsServiceSettings( DEFAULT_MULTILINGUAL_EMBED_MODEL_ID, @@ -64,7 +64,7 @@ public class PreconfiguredEndpointModelAdapterTests extends ESTestCase { public void testGetModelsWithValidId() { var endpointIds = Set.of( DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1, - DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2, + GP_LLM_V2_ENDPOINT_ID, DEFAULT_ELSER_ENDPOINT_ID_V2, DEFAULT_RERANK_ENDPOINT_ID_V1, DEFAULT_MULTILINGUAL_EMBED_ENDPOINT_ID @@ -101,7 +101,7 @@ public void testGetModelsWithValidId() { ), new ElasticInferenceServiceModel( new ModelConfigurations( - DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V2, + GP_LLM_V2_ENDPOINT_ID, TaskType.CHAT_COMPLETION, ElasticInferenceService.NAME, COMPLETION_SETTINGS_V2, From ca5e07bf6a384e5871b99ed431b2e4af1abb094f Mon Sep 17 00:00:00 2001 From: AntMoraisElastic Date: Fri, 14 Nov 2025 11:43:55 +0000 Subject: [PATCH 05/11] Autoformat --- .../services/elastic/ElasticInferenceServiceTests.java | 1 - .../authorization/PreconfiguredEndpointModelAdapterTests.java | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java index 89312362599f4..4b17cab04471a 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java @@ -1065,7 +1065,6 @@ public void testSupportedTaskTypes_Returns_Unsupported() throws Exception { } } - public void testUnifiedCompletionError() { var e = assertThrows(UnifiedChatCompletionException.class, () -> testUnifiedStream(404, """ { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java index b3ccc6ce0a7f4..6b9449f6e1f98 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java @@ -26,9 +26,7 @@ import java.util.Set; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1; -import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.GP_LLM_V2_ENDPOINT_ID; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_MODEL_ID_V1; -import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.GP_LLM_V2_MODEL_ID; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_ELSER_2_MODEL_ID; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_ELSER_ENDPOINT_ID_V2; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_MULTILINGUAL_EMBED_ENDPOINT_ID; @@ -36,6 +34,8 @@ import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_RERANK_ENDPOINT_ID_V1; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_RERANK_MODEL_ID_V1; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DENSE_TEXT_EMBEDDINGS_DIMENSIONS; +import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.GP_LLM_V2_ENDPOINT_ID; +import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.GP_LLM_V2_MODEL_ID; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.defaultDenseTextEmbeddingsSimilarity; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.hasSize; From fe5b0cada7e49af0b8f9e53e37299a41c586219f Mon Sep 17 00:00:00 2001 From: AntMoraisElastic Date: Fri, 14 Nov 2025 11:50:01 +0000 Subject: [PATCH 06/11] Remove new model from Mock auth server --- .../MockElasticInferenceServiceAuthorizationServer.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockElasticInferenceServiceAuthorizationServer.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockElasticInferenceServiceAuthorizationServer.java index 8bb9a7e576baf..dba1ce3c9e5f9 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockElasticInferenceServiceAuthorizationServer.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockElasticInferenceServiceAuthorizationServer.java @@ -38,10 +38,6 @@ public void enqueueAuthorizeAllModelsResponse() { "model_name": "rainbow-sprinkles", "task_types": ["chat"] }, - { - "model_name": "gp-llm-v2", - "task_types": ["chat"] - }, { "model_name": "elser_model_2", "task_types": ["embed/text/sparse"] From caf273a1dcd73a44079c4f48eca159cff3449710 Mon Sep 17 00:00:00 2001 From: AntMoraisElastic Date: Fri, 14 Nov 2025 11:50:14 +0000 Subject: [PATCH 07/11] Rename completion service settings for new model --- .../services/elastic/InternalPreconfiguredEndpoints.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java index 3328c28eccd73..97bbaa581a3d7 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java @@ -57,7 +57,7 @@ public record MinimalModel( private static final ElasticInferenceServiceCompletionServiceSettings COMPLETION_SERVICE_SETTINGS = new ElasticInferenceServiceCompletionServiceSettings(DEFAULT_CHAT_COMPLETION_MODEL_ID_V1); - private static final ElasticInferenceServiceCompletionServiceSettings COMPLETION_SERVICE_SETTINGS_V2 = + private static final ElasticInferenceServiceCompletionServiceSettings GP_LLM_V2_COMPLETION_SERVICE_SETTINGS = new ElasticInferenceServiceCompletionServiceSettings(GP_LLM_V2_MODEL_ID); private static final ElasticInferenceServiceSparseEmbeddingsServiceSettings SPARSE_EMBEDDINGS_SERVICE_SETTINGS = new ElasticInferenceServiceSparseEmbeddingsServiceSettings(DEFAULT_ELSER_2_MODEL_ID, null); @@ -92,10 +92,10 @@ public record MinimalModel( GP_LLM_V2_ENDPOINT_ID, TaskType.CHAT_COMPLETION, ElasticInferenceService.NAME, - COMPLETION_SERVICE_SETTINGS_V2, + GP_LLM_V2_COMPLETION_SERVICE_SETTINGS, ChunkingSettingsBuilder.DEFAULT_SETTINGS ), - COMPLETION_SERVICE_SETTINGS_V2 + GP_LLM_V2_COMPLETION_SERVICE_SETTINGS ), DEFAULT_ELSER_2_MODEL_ID, List.of( From 0d47e61ad0fa56f66f9bf719af96a30f8a9dcf92 Mon Sep 17 00:00:00 2001 From: AntMoraisElastic Date: Fri, 14 Nov 2025 11:54:39 +0000 Subject: [PATCH 08/11] Rename completion settings --- .../PreconfiguredEndpointModelAdapterTests.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java index 6b9449f6e1f98..003f70483daad 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java @@ -47,7 +47,7 @@ public class PreconfiguredEndpointModelAdapterTests extends ESTestCase { new ElasticInferenceServiceSparseEmbeddingsServiceSettings(DEFAULT_ELSER_2_MODEL_ID, null); private static final ElasticInferenceServiceCompletionServiceSettings COMPLETION_SETTINGS = new ElasticInferenceServiceCompletionServiceSettings(DEFAULT_CHAT_COMPLETION_MODEL_ID_V1); - private static final ElasticInferenceServiceCompletionServiceSettings COMPLETION_SETTINGS_V2 = + private static final ElasticInferenceServiceCompletionServiceSettings GP_LLM_V2_COMPLETION_SETTINGS = new ElasticInferenceServiceCompletionServiceSettings(GP_LLM_V2_MODEL_ID); private static final ElasticInferenceServiceDenseTextEmbeddingsServiceSettings DENSE_SETTINGS = new ElasticInferenceServiceDenseTextEmbeddingsServiceSettings( @@ -104,11 +104,11 @@ public void testGetModelsWithValidId() { GP_LLM_V2_ENDPOINT_ID, TaskType.CHAT_COMPLETION, ElasticInferenceService.NAME, - COMPLETION_SETTINGS_V2, + GP_LLM_V2_COMPLETION_SETTINGS, ChunkingSettingsBuilder.DEFAULT_SETTINGS ), new ModelSecrets(EmptySecretSettings.INSTANCE), - COMPLETION_SETTINGS_V2, + GP_LLM_V2_COMPLETION_SETTINGS, EIS_COMPONENTS ), new ElasticInferenceServiceModel( From 56cf6ec506c3291001b72ea832a4aa656e2b84b8 Mon Sep 17 00:00:00 2001 From: AntMoraisElastic Date: Fri, 14 Nov 2025 14:13:15 +0000 Subject: [PATCH 09/11] Add gp-llm-v2 to mock response --- .../MockElasticInferenceServiceAuthorizationServer.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockElasticInferenceServiceAuthorizationServer.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockElasticInferenceServiceAuthorizationServer.java index dba1ce3c9e5f9..8bb9a7e576baf 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockElasticInferenceServiceAuthorizationServer.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockElasticInferenceServiceAuthorizationServer.java @@ -38,6 +38,10 @@ public void enqueueAuthorizeAllModelsResponse() { "model_name": "rainbow-sprinkles", "task_types": ["chat"] }, + { + "model_name": "gp-llm-v2", + "task_types": ["chat"] + }, { "model_name": "elser_model_2", "task_types": ["embed/text/sparse"] From bbac2b3da45402e591e4beace3f541ca059fbd14 Mon Sep 17 00:00:00 2001 From: AntMoraisElastic Date: Fri, 14 Nov 2025 14:13:40 +0000 Subject: [PATCH 10/11] Rename gp-llm-v2 inference endpoint ID --- .../services/elastic/InternalPreconfiguredEndpoints.java | 4 ++-- .../PreconfiguredEndpointModelAdapterTests.java | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java index 97bbaa581a3d7..a8de0ad47bf5c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java @@ -35,7 +35,7 @@ public class InternalPreconfiguredEndpoints { // gp-llm-v2 public static final String GP_LLM_V2_MODEL_ID = "gp-llm-v2"; - public static final String GP_LLM_V2_ENDPOINT_ID = ".gp-llm-v2-chat_completion"; + public static final String GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID = ".gp-llm-v2-chat_completion"; // elser-2 public static final String DEFAULT_ELSER_2_MODEL_ID = "elser_model_2"; @@ -89,7 +89,7 @@ public record MinimalModel( GP_LLM_V2_MODEL_ID, new MinimalModel( new ModelConfigurations( - GP_LLM_V2_ENDPOINT_ID, + GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID, TaskType.CHAT_COMPLETION, ElasticInferenceService.NAME, GP_LLM_V2_COMPLETION_SERVICE_SETTINGS, diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java index 003f70483daad..a3fc723309a9f 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java @@ -34,7 +34,7 @@ import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_RERANK_ENDPOINT_ID_V1; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_RERANK_MODEL_ID_V1; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DENSE_TEXT_EMBEDDINGS_DIMENSIONS; -import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.GP_LLM_V2_ENDPOINT_ID; +import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.GP_LLM_V2_MODEL_ID; import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.defaultDenseTextEmbeddingsSimilarity; import static org.hamcrest.Matchers.containsInAnyOrder; @@ -64,7 +64,7 @@ public class PreconfiguredEndpointModelAdapterTests extends ESTestCase { public void testGetModelsWithValidId() { var endpointIds = Set.of( DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1, - GP_LLM_V2_ENDPOINT_ID, + GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID, DEFAULT_ELSER_ENDPOINT_ID_V2, DEFAULT_RERANK_ENDPOINT_ID_V1, DEFAULT_MULTILINGUAL_EMBED_ENDPOINT_ID @@ -101,7 +101,7 @@ public void testGetModelsWithValidId() { ), new ElasticInferenceServiceModel( new ModelConfigurations( - GP_LLM_V2_ENDPOINT_ID, + GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID, TaskType.CHAT_COMPLETION, ElasticInferenceService.NAME, GP_LLM_V2_COMPLETION_SETTINGS, From 9bf693bbe63af3d941d5901e45c05851c66f2c9c Mon Sep 17 00:00:00 2001 From: AntMoraisElastic Date: Fri, 14 Nov 2025 14:24:26 +0000 Subject: [PATCH 11/11] Add support for several inference endpoints for gp-llm-v2 model ID --- .../InternalPreconfiguredEndpoints.java | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java index a8de0ad47bf5c..8b1ee97a2840d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java @@ -87,15 +87,17 @@ public record MinimalModel( ) ), GP_LLM_V2_MODEL_ID, - new MinimalModel( - new ModelConfigurations( - GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID, - TaskType.CHAT_COMPLETION, - ElasticInferenceService.NAME, - GP_LLM_V2_COMPLETION_SERVICE_SETTINGS, - ChunkingSettingsBuilder.DEFAULT_SETTINGS - ), - GP_LLM_V2_COMPLETION_SERVICE_SETTINGS + List.of( + new MinimalModel( + new ModelConfigurations( + GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID, + TaskType.CHAT_COMPLETION, + ElasticInferenceService.NAME, + GP_LLM_V2_COMPLETION_SERVICE_SETTINGS, + ChunkingSettingsBuilder.DEFAULT_SETTINGS + ), + GP_LLM_V2_COMPLETION_SERVICE_SETTINGS + ) ), DEFAULT_ELSER_2_MODEL_ID, List.of(