Skip to content

Commit 60a99e8

Browse files
authored
Implement coerce for exponential_histogram field type (#137944)
1 parent 2512d07 commit 60a99e8

File tree

5 files changed

+148
-7
lines changed

5 files changed

+148
-7
lines changed

x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramParser.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import java.io.IOException;
1515
import java.util.ArrayList;
1616
import java.util.List;
17+
import java.util.Set;
1718

1819
import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken;
1920

@@ -22,6 +23,12 @@ public class HistogramParser {
2223
private static final ParseField COUNTS_FIELD = new ParseField("counts");
2324
private static final ParseField VALUES_FIELD = new ParseField("values");
2425

26+
private static final Set<String> ROOT_FIELD_NAMES = Set.of(COUNTS_FIELD.getPreferredName(), VALUES_FIELD.getPreferredName());
27+
28+
public static boolean isHistogramSubFieldName(String subFieldName) {
29+
return ROOT_FIELD_NAMES.contains(subFieldName);
30+
}
31+
2532
/**
2633
* A parsed histogram field, can represent either a T-Digest or a HDR histogram.
2734
* @param values the centroids, guaranteed to be distinct and in increasing order

x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverter.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,9 @@ public static ExponentialHistogramParser.ParsedExponentialHistogram tDigestToExp
106106
}
107107

108108
private static void appendCentroidWithCountAsBucket(double centroid, long count, int scale, List<IndexWithCount> outputBuckets) {
109+
if (count == 0) {
110+
return; // zero counts are allowed in T-Digests but not in exponential histograms
111+
}
109112
long index = ExponentialScaleUtils.computeIndex(centroid, scale);
110113
assert outputBuckets.isEmpty() || outputBuckets.getLast().index() < index;
111114
outputBuckets.add(new IndexWithCount(index, count));

x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverterTests.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,22 @@ public void testToExponentialHistogramConversionWithCloseCentroids() {
9090
assertThat(posBuckets.get(1).count(), equalTo(2L));
9191
}
9292

93+
public void testToExponentialHistogramConversionWithZeroCounts() {
94+
// build a t-digest with two centroids very close to each other
95+
List<Double> centroids = List.of(1.0, 2.0, 3.0);
96+
List<Long> counts = List.of(1L, 0L, 2L);
97+
98+
HistogramParser.ParsedHistogram input = new HistogramParser.ParsedHistogram(centroids, counts);
99+
ExponentialHistogramParser.ParsedExponentialHistogram converted = ParsedHistogramConverter.tDigestToExponential(input);
100+
101+
assertThat(converted.zeroCount(), equalTo(0L));
102+
List<IndexWithCount> posBuckets = converted.positiveBuckets();
103+
assertThat(posBuckets.size(), equalTo(2));
104+
assertThat(posBuckets.get(0).index(), lessThan(posBuckets.get(1).index()));
105+
assertThat(posBuckets.get(0).count(), equalTo(1L));
106+
assertThat(posBuckets.get(1).count(), equalTo(2L));
107+
}
108+
93109
public void testToTDigestConversionMergesCentroids() {
94110
// build a histogram with two buckets very close to zero
95111
ExponentialHistogram input = ExponentialHistogram.builder(ExponentialHistogram.MAX_SCALE, ExponentialHistogramCircuitBreaker.noop())

x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.apache.lucene.util.NumericUtils;
2121
import org.elasticsearch.common.Explicit;
2222
import org.elasticsearch.common.io.stream.BytesStreamOutput;
23+
import org.elasticsearch.common.settings.Setting;
2324
import org.elasticsearch.common.util.BigArrays;
2425
import org.elasticsearch.core.Nullable;
2526
import org.elasticsearch.core.Releasables;
@@ -41,6 +42,7 @@
4142
import org.elasticsearch.index.mapper.LuceneDocument;
4243
import org.elasticsearch.index.mapper.MappedFieldType;
4344
import org.elasticsearch.index.mapper.MapperBuilderContext;
45+
import org.elasticsearch.index.mapper.NumberFieldMapper;
4446
import org.elasticsearch.index.mapper.SourceLoader;
4547
import org.elasticsearch.index.mapper.SourceValueFetcher;
4648
import org.elasticsearch.index.mapper.ValueFetcher;
@@ -59,7 +61,9 @@
5961
import org.elasticsearch.xcontent.XContentParser;
6062
import org.elasticsearch.xcontent.XContentSubParser;
6163
import org.elasticsearch.xpack.analytics.mapper.ExponentialHistogramParser;
64+
import org.elasticsearch.xpack.analytics.mapper.HistogramParser;
6265
import org.elasticsearch.xpack.analytics.mapper.IndexWithCount;
66+
import org.elasticsearch.xpack.analytics.mapper.ParsedHistogramConverter;
6367
import org.elasticsearch.xpack.exponentialhistogram.fielddata.ExponentialHistogramValuesReader;
6468
import org.elasticsearch.xpack.exponentialhistogram.fielddata.IndexExponentialHistogramFieldData;
6569
import org.elasticsearch.xpack.exponentialhistogram.fielddata.LeafExponentialHistogramFieldData;
@@ -94,6 +98,9 @@ public class ExponentialHistogramFieldMapper extends FieldMapper {
9498

9599
public static final String CONTENT_TYPE = "exponential_histogram";
96100

101+
// use the same default as numbers
102+
private static final Setting<Boolean> COERCE_SETTING = NumberFieldMapper.COERCE_SETTING;
103+
97104
private static ExponentialHistogramFieldMapper toType(FieldMapper in) {
98105
return (ExponentialHistogramFieldMapper) in;
99106
}
@@ -108,7 +115,7 @@ private static ExponentialHistogramFieldMapper toType(FieldMapper in) {
108115
* @param fullPath the full path of the mapped field
109116
* @return the name for the lucene field
110117
*/
111-
private static String zeroThresholdSubFieldName(String fullPath) {
118+
static String zeroThresholdSubFieldName(String fullPath) {
112119
return fullPath + "._zero_threshold";
113120
}
114121

@@ -122,7 +129,7 @@ private static String zeroThresholdSubFieldName(String fullPath) {
122129
* @param fullPath the full path of the mapped field
123130
* @return the name for the lucene field
124131
*/
125-
private static String valuesCountSubFieldName(String fullPath) {
132+
static String valuesCountSubFieldName(String fullPath) {
126133
return fullPath + "._values_count";
127134
}
128135

@@ -142,20 +149,22 @@ static class Builder extends FieldMapper.Builder {
142149

143150
private final FieldMapper.Parameter<Map<String, String>> meta = FieldMapper.Parameter.metaParam();
144151
private final FieldMapper.Parameter<Explicit<Boolean>> ignoreMalformed;
152+
private final Parameter<Explicit<Boolean>> coerce;
145153

146-
Builder(String name, boolean ignoreMalformedByDefault) {
154+
Builder(String name, boolean ignoreMalformedByDefault, boolean coerceByDefault) {
147155
super(name);
148156
this.ignoreMalformed = FieldMapper.Parameter.explicitBoolParam(
149157
"ignore_malformed",
150158
true,
151159
m -> toType(m).ignoreMalformed,
152160
ignoreMalformedByDefault
153161
);
162+
this.coerce = Parameter.explicitBoolParam("coerce", true, m -> toType(m).coerce, coerceByDefault);
154163
}
155164

156165
@Override
157166
protected FieldMapper.Parameter<?>[] getParameters() {
158-
return new FieldMapper.Parameter<?>[] { ignoreMalformed, meta };
167+
return new FieldMapper.Parameter<?>[] { ignoreMalformed, coerce, meta };
159168
}
160169

161170
@Override
@@ -170,13 +179,16 @@ public ExponentialHistogramFieldMapper build(MapperBuilderContext context) {
170179
}
171180

172181
public static final FieldMapper.TypeParser PARSER = new FieldMapper.TypeParser(
173-
(n, c) -> new Builder(n, IGNORE_MALFORMED_SETTING.get(c.getSettings())),
182+
(n, c) -> new Builder(n, IGNORE_MALFORMED_SETTING.get(c.getSettings()), COERCE_SETTING.get(c.getSettings())),
174183
notInMultiFields(CONTENT_TYPE)
175184
);
176185

177186
private final Explicit<Boolean> ignoreMalformed;
178187
private final boolean ignoreMalformedByDefault;
179188

189+
private final Explicit<Boolean> coerce;
190+
private final boolean coerceByDefault;
191+
180192
ExponentialHistogramFieldMapper(
181193
String simpleName,
182194
MappedFieldType mappedFieldType,
@@ -186,21 +198,27 @@ public ExponentialHistogramFieldMapper build(MapperBuilderContext context) {
186198
super(simpleName, mappedFieldType, builderParams);
187199
this.ignoreMalformed = builder.ignoreMalformed.getValue();
188200
this.ignoreMalformedByDefault = builder.ignoreMalformed.getDefaultValue().value();
201+
this.coerce = builder.coerce.getValue();
202+
this.coerceByDefault = builder.coerce.getDefaultValue().value();
189203
}
190204

191205
@Override
192206
public boolean ignoreMalformed() {
193207
return ignoreMalformed.value();
194208
}
195209

210+
boolean coerce() {
211+
return coerce.value();
212+
}
213+
196214
@Override
197215
protected String contentType() {
198216
return CONTENT_TYPE;
199217
}
200218

201219
@Override
202220
public FieldMapper.Builder getMergeBuilder() {
203-
return new Builder(leafName(), ignoreMalformedByDefault).init(this);
221+
return new Builder(leafName(), ignoreMalformedByDefault, coerceByDefault).init(this);
204222
}
205223

206224
@Override
@@ -427,7 +445,15 @@ public void parse(DocumentParserContext context) throws IOException {
427445
subParser = new XContentSubParser(context.parser());
428446
}
429447
subParser.nextToken();
430-
ExponentialHistogramParser.ParsedExponentialHistogram parsedHistogram = ExponentialHistogramParser.parse(fullPath(), subParser);
448+
ExponentialHistogramParser.ParsedExponentialHistogram parsedHistogram;
449+
if (coerce()
450+
&& subParser.currentToken() == XContentParser.Token.FIELD_NAME
451+
&& HistogramParser.isHistogramSubFieldName(subParser.currentName())) {
452+
HistogramParser.ParsedHistogram parsedTDigest = HistogramParser.parse(fullPath(), subParser);
453+
parsedHistogram = ParsedHistogramConverter.tDigestToExponential(parsedTDigest);
454+
} else {
455+
parsedHistogram = ExponentialHistogramParser.parse(fullPath(), subParser);
456+
}
431457

432458
if (context.doc().getByKey(fieldType().name()) != null) {
433459
throw new IllegalArgumentException(

x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,29 @@
77

88
package org.elasticsearch.xpack.exponentialhistogram;
99

10+
import org.apache.lucene.index.IndexableField;
11+
import org.apache.lucene.util.BytesRef;
12+
import org.apache.lucene.util.NumericUtils;
13+
import org.elasticsearch.common.bytes.BytesReference;
1014
import org.elasticsearch.core.Types;
15+
import org.elasticsearch.exponentialhistogram.CompressedExponentialHistogram;
1116
import org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils;
1217
import org.elasticsearch.exponentialhistogram.ZeroBucket;
1318
import org.elasticsearch.index.mapper.DocumentMapper;
1419
import org.elasticsearch.index.mapper.DocumentParsingException;
1520
import org.elasticsearch.index.mapper.MappedFieldType;
1621
import org.elasticsearch.index.mapper.MapperParsingException;
1722
import org.elasticsearch.index.mapper.MapperTestCase;
23+
import org.elasticsearch.index.mapper.ParsedDocument;
1824
import org.elasticsearch.index.mapper.SourceToParse;
1925
import org.elasticsearch.plugins.Plugin;
2026
import org.elasticsearch.xcontent.XContentBuilder;
27+
import org.elasticsearch.xcontent.XContentFactory;
28+
import org.elasticsearch.xcontent.XContentType;
2129
import org.elasticsearch.xpack.analytics.mapper.ExponentialHistogramParser;
30+
import org.elasticsearch.xpack.analytics.mapper.HistogramParser;
2231
import org.elasticsearch.xpack.analytics.mapper.IndexWithCount;
32+
import org.elasticsearch.xpack.analytics.mapper.ParsedHistogramConverter;
2333
import org.junit.AssumptionViolatedException;
2434
import org.junit.Before;
2535

@@ -35,12 +45,14 @@
3545
import java.util.Map;
3646
import java.util.OptionalDouble;
3747
import java.util.Set;
48+
import java.util.stream.IntStream;
3849

3950
import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX;
4051
import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE;
4152
import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX;
4253
import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_SCALE;
4354
import static org.hamcrest.Matchers.containsString;
55+
import static org.hamcrest.Matchers.equalTo;
4456

4557
public class ExponentialHistogramFieldMapperTests extends MapperTestCase {
4658

@@ -98,6 +110,83 @@ protected boolean supportsIgnoreMalformed() {
98110
@Override
99111
protected void registerParameters(ParameterChecker checker) throws IOException {
100112
checker.registerUpdateCheck(b -> b.field("ignore_malformed", true), m -> assertTrue(m.ignoreMalformed()));
113+
checker.registerUpdateCheck(b -> b.field("coerce", false), m -> assertFalse(((ExponentialHistogramFieldMapper) m).coerce()));
114+
}
115+
116+
public void testCoerce() throws IOException {
117+
List<Double> centroids = randomDoubles().map(val -> val * 1_000_000 - 500_000)
118+
.map(val -> randomBoolean() ? val : 0)
119+
.distinct()
120+
.limit(randomIntBetween(0, 100))
121+
.sorted()
122+
.boxed()
123+
.toList();
124+
List<Long> counts = IntStream.range(0, centroids.size()).mapToLong(i -> randomIntBetween(0, 100)).boxed().toList();
125+
126+
HistogramParser.ParsedHistogram input = new HistogramParser.ParsedHistogram(centroids, counts);
127+
128+
XContentBuilder inputJson = XContentFactory.jsonBuilder();
129+
inputJson.startObject()
130+
.field("field")
131+
.startObject()
132+
.array("values", centroids.toArray())
133+
.array("counts", counts.toArray())
134+
.endObject()
135+
.endObject();
136+
BytesReference inputDocBytes = BytesReference.bytes(inputJson);
137+
138+
ExponentialHistogramParser.ParsedExponentialHistogram expectedCoerced = ParsedHistogramConverter.tDigestToExponential(input);
139+
140+
DocumentMapper defaultMapper = createDocumentMapper(fieldMapping(this::minimalMapping));
141+
142+
ParsedDocument doc = defaultMapper.parse(new SourceToParse("1", inputDocBytes, XContentType.JSON));
143+
ExponentialHistogramParser.ParsedExponentialHistogram ingestedHisto = docValueToParsedHistogram(doc, "field");
144+
assertThat(ingestedHisto, equalTo(expectedCoerced));
145+
146+
DocumentMapper coerceDisabledMapper = createDocumentMapper(
147+
fieldMapping(b -> b.field("type", "exponential_histogram").field("coerce", false))
148+
);
149+
ThrowingRunnable runnable = () -> coerceDisabledMapper.parse(new SourceToParse("1", inputDocBytes, XContentType.JSON));
150+
DocumentParsingException e = expectThrows(DocumentParsingException.class, runnable);
151+
assertThat(e.getCause().getMessage(), containsString("unknown parameter [values]"));
152+
}
153+
154+
private static IndexableField getSingleField(ParsedDocument doc, String fieldName) {
155+
List<IndexableField> fields = doc.rootDoc().getFields(fieldName);
156+
assertThat(fields.size(), equalTo(1));
157+
return fields.getFirst();
158+
}
159+
160+
private static ExponentialHistogramParser.ParsedExponentialHistogram docValueToParsedHistogram(ParsedDocument doc, String fieldName) {
161+
BytesRef encodedBytes = getSingleField(doc, fieldName).binaryValue();
162+
long valueCount = getSingleField(doc, ExponentialHistogramFieldMapper.valuesCountSubFieldName(fieldName)).numericValue()
163+
.longValue();
164+
double zeroThreshold = NumericUtils.sortableLongToDouble(
165+
getSingleField(doc, ExponentialHistogramFieldMapper.zeroThresholdSubFieldName(fieldName)).numericValue().longValue()
166+
);
167+
168+
// min max and sum are not relevant for these tests, so we use fake ones
169+
double min = valueCount == 0 ? Double.NaN : 0.0;
170+
double max = valueCount == 0 ? Double.NaN : 0.0;
171+
double sum = 0;
172+
173+
CompressedExponentialHistogram histogram = new CompressedExponentialHistogram();
174+
try {
175+
histogram.reset(zeroThreshold, valueCount, sum, min, max, encodedBytes);
176+
} catch (IOException e) {
177+
throw new RuntimeException(e);
178+
}
179+
180+
return new ExponentialHistogramParser.ParsedExponentialHistogram(
181+
histogram.scale(),
182+
histogram.zeroBucket().zeroThreshold(),
183+
histogram.zeroBucket().count(),
184+
IndexWithCount.fromIterator(histogram.negativeBuckets().iterator()),
185+
IndexWithCount.fromIterator(histogram.positiveBuckets().iterator()),
186+
null,
187+
null,
188+
null
189+
);
101190
}
102191

103192
@Override

0 commit comments

Comments
 (0)