From d6c37f8ea6939e35d8bc70a906aefb228db82d1a Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Thu, 13 Nov 2025 21:33:32 +0100 Subject: [PATCH 1/3] Use single shard for index data. --- .../xpack/ml/integration/RegressionIT.java | 31 ++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java index 10fc4721cd4eb..0f0f41c87207c 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java @@ -14,6 +14,8 @@ import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.support.WriteRequest; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Strings; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.SearchHit; @@ -360,6 +362,30 @@ public void testStopAndRestart() throws Exception { public void testTwoJobsWithSameRandomizeSeedUseSameTrainingSet() throws Exception { String sourceIndex = "regression_two_jobs_with_same_randomize_seed_source"; + // Create index with 1 shard to ensure deterministic document ordering during reindexing + String mapping = Strings.format(""" + { + "properties": { + "@timestamp": { + "type": "date" + }, + "%s": { + "type": "double" + }, + "%s": { + "type": "unsigned_long" + }, + "%s": { + "type": "double" + } + } + }""", NUMERICAL_FEATURE_FIELD, DISCRETE_NUMERICAL_FEATURE_FIELD, DEPENDENT_VARIABLE_FIELD); + client().admin() + .indices() + .prepareCreate(sourceIndex) + .setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)) + .setMapping(mapping) + .get(); indexData(sourceIndex, 100, 0); String firstJobId = "regression_two_jobs_with_same_randomize_seed_1"; @@ -908,7 +934,10 @@ static void indexData(String sourceIndex, int numTrainingRows, int numNonTrainin throw new ElasticsearchException(ex); } } else { - client().admin().indices().prepareCreate(sourceIndex).setMapping(mapping).get(); + // Only create index if it doesn't already exist (allows test to create it with custom settings) + if (client().admin().indices().prepareExists(sourceIndex).get().isExists() == false) { + client().admin().indices().prepareCreate(sourceIndex).setMapping(mapping).get(); + } } BulkRequestBuilder bulkRequestBuilder = client().prepareBulk().setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); From 61bacc9a464cbe773cfa34961e05ea13609463d7 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Thu, 13 Nov 2025 21:42:35 +0100 Subject: [PATCH 2/3] remove muted test --- muted-tests.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 6c8763c95bcce..37dec1ed0898a 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -58,9 +58,6 @@ tests: - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=transform/transforms_reset/Test reset running transform} issue: https://github.com/elastic/elasticsearch/issues/117473 -- class: org.elasticsearch.xpack.ml.integration.RegressionIT - method: testTwoJobsWithSameRandomizeSeedUseSameTrainingSet - issue: https://github.com/elastic/elasticsearch/issues/117805 - class: org.elasticsearch.packaging.test.ArchiveTests method: test44AutoConfigurationNotTriggeredOnNotWriteableConfDir issue: https://github.com/elastic/elasticsearch/issues/118208 From 3276b624372f9a166eecf82b6d2c1d6849ab191a Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Fri, 14 Nov 2025 10:51:11 +0100 Subject: [PATCH 3/3] Refactor RegressionIT to use force merge for deterministic document processing order --- .../xpack/ml/integration/RegressionIT.java | 34 +++---------------- 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java index 0f0f41c87207c..95cdea69362cc 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java @@ -14,8 +14,6 @@ import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.support.WriteRequest; -import org.elasticsearch.cluster.metadata.IndexMetadata; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Strings; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.SearchHit; @@ -362,31 +360,10 @@ public void testStopAndRestart() throws Exception { public void testTwoJobsWithSameRandomizeSeedUseSameTrainingSet() throws Exception { String sourceIndex = "regression_two_jobs_with_same_randomize_seed_source"; - // Create index with 1 shard to ensure deterministic document ordering during reindexing - String mapping = Strings.format(""" - { - "properties": { - "@timestamp": { - "type": "date" - }, - "%s": { - "type": "double" - }, - "%s": { - "type": "unsigned_long" - }, - "%s": { - "type": "double" - } - } - }""", NUMERICAL_FEATURE_FIELD, DISCRETE_NUMERICAL_FEATURE_FIELD, DEPENDENT_VARIABLE_FIELD); - client().admin() - .indices() - .prepareCreate(sourceIndex) - .setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)) - .setMapping(mapping) - .get(); indexData(sourceIndex, 100, 0); + // Force merge to single segment to ensure deterministic _doc sort order during reindexing + // Without this, multiple segments or segment merges can cause non-deterministic document processing order + client().admin().indices().prepareForceMerge(sourceIndex).setMaxNumSegments(1).setFlush(true).get(); String firstJobId = "regression_two_jobs_with_same_randomize_seed_1"; String firstJobDestIndex = firstJobId + "_dest"; @@ -934,10 +911,7 @@ static void indexData(String sourceIndex, int numTrainingRows, int numNonTrainin throw new ElasticsearchException(ex); } } else { - // Only create index if it doesn't already exist (allows test to create it with custom settings) - if (client().admin().indices().prepareExists(sourceIndex).get().isExists() == false) { - client().admin().indices().prepareCreate(sourceIndex).setMapping(mapping).get(); - } + client().admin().indices().prepareCreate(sourceIndex).setMapping(mapping).get(); } BulkRequestBuilder bulkRequestBuilder = client().prepareBulk().setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);