Skip to content

Commit 129dd13

Browse files
authored
[8.x] Use FallbackSyntheticSourceBlockLoader for point and geo_point (#125816) (#126079)
* Use FallbackSyntheticSourceBlockLoader for point and geo_point (#125816) (cherry picked from commit f3ccde6) # Conflicts: # server/src/test/java/org/elasticsearch/index/mapper/blockloader/BooleanFieldBlockLoaderTests.java # test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java # test/framework/src/main/java/org/elasticsearch/index/mapper/NumberFieldBlockLoaderTestCase.java * compilation
1 parent 74f9aca commit 129dd13

27 files changed

+867
-87
lines changed

docs/changelog/125816.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 125816
2+
summary: Use `FallbackSyntheticSourceBlockLoader` for point and `geo_point`
3+
area: Mapping
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/index/mapper/AbstractPointGeometryFieldMapper.java

-11
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@
2222
import java.util.function.Function;
2323
import java.util.function.Supplier;
2424

25-
import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.DOC_VALUES;
26-
2725
/** Base class for spatial fields that only support indexing points */
2826
public abstract class AbstractPointGeometryFieldMapper<T> extends AbstractGeometryFieldMapper<T> {
2927

@@ -148,7 +146,6 @@ protected void parseAndConsumeFromObject(
148146
}
149147

150148
public abstract static class AbstractPointFieldType<T extends SpatialPoint> extends AbstractGeometryFieldType<T> {
151-
152149
protected AbstractPointFieldType(
153150
String name,
154151
boolean indexed,
@@ -165,13 +162,5 @@ protected AbstractPointFieldType(
165162
protected Object nullValueAsSource(T nullValue) {
166163
return nullValue == null ? null : nullValue.toWKT();
167164
}
168-
169-
@Override
170-
public BlockLoader blockLoader(BlockLoaderContext blContext) {
171-
if (blContext.fieldExtractPreference() == DOC_VALUES && hasDocValues()) {
172-
return new BlockDocValuesReader.LongsBlockLoader(name());
173-
}
174-
return blockLoaderFromSource(blContext);
175-
}
176165
}
177166
}

server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java

+31-3
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@
6767
import java.util.Set;
6868
import java.util.function.Function;
6969

70+
import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.DOC_VALUES;
71+
7072
/**
7173
* Field Mapper for geo_point types.
7274
*
@@ -224,7 +226,8 @@ public FieldMapper build(MapperBuilderContext context) {
224226
scriptValues(),
225227
meta.get(),
226228
metric.get(),
227-
indexMode
229+
indexMode,
230+
context.isSourceSynthetic()
228231
);
229232
hasScript = script.get() != null;
230233
onScriptError = onScriptErrorParam.get();
@@ -373,6 +376,7 @@ public static class GeoPointFieldType extends AbstractPointFieldType<GeoPoint> i
373376

374377
private final FieldValues<GeoPoint> scriptValues;
375378
private final IndexMode indexMode;
379+
private final boolean isSyntheticSource;
376380

377381
private GeoPointFieldType(
378382
String name,
@@ -384,17 +388,19 @@ private GeoPointFieldType(
384388
FieldValues<GeoPoint> scriptValues,
385389
Map<String, String> meta,
386390
TimeSeriesParams.MetricType metricType,
387-
IndexMode indexMode
391+
IndexMode indexMode,
392+
boolean isSyntheticSource
388393
) {
389394
super(name, indexed, stored, hasDocValues, parser, nullValue, meta);
390395
this.scriptValues = scriptValues;
391396
this.metricType = metricType;
392397
this.indexMode = indexMode;
398+
this.isSyntheticSource = isSyntheticSource;
393399
}
394400

395401
// only used in test
396402
public GeoPointFieldType(String name, TimeSeriesParams.MetricType metricType, IndexMode indexMode) {
397-
this(name, true, false, true, null, null, null, Collections.emptyMap(), metricType, indexMode);
403+
this(name, true, false, true, null, null, null, Collections.emptyMap(), metricType, indexMode, false);
398404
}
399405

400406
// only used in test
@@ -527,6 +533,28 @@ public Query distanceFeatureQuery(Object origin, String pivot, SearchExecutionCo
527533
public TimeSeriesParams.MetricType getMetricType() {
528534
return metricType;
529535
}
536+
537+
@Override
538+
public BlockLoader blockLoader(BlockLoaderContext blContext) {
539+
if (blContext.fieldExtractPreference() == DOC_VALUES && hasDocValues()) {
540+
return new BlockDocValuesReader.LongsBlockLoader(name());
541+
}
542+
543+
// There are two scenarios possible once we arrive here:
544+
//
545+
// * Stored source - we'll just use blockLoaderFromSource
546+
// * Synthetic source. However, because of the fieldExtractPreference() check above it is still possible that doc_values are
547+
// present here.
548+
// So we have two subcases:
549+
// - doc_values are enabled - _ignored_source field does not exist since we have doc_values. We will use
550+
// blockLoaderFromSource which reads "native" synthetic source.
551+
// - doc_values are disabled - we know that _ignored_source field is present and use a special block loader.
552+
if (isSyntheticSource && hasDocValues() == false) {
553+
return blockLoaderFromFallbackSyntheticSource(blContext);
554+
}
555+
556+
return blockLoaderFromSource(blContext);
557+
}
530558
}
531559

532560
/** GeoPoint parser implementation */

server/src/test/java/org/elasticsearch/index/mapper/blockloader/BooleanFieldBlockLoaderTests.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ public BooleanFieldBlockLoaderTests(Params params) {
2323

2424
@Override
2525
@SuppressWarnings("unchecked")
26-
protected Object expected(Map<String, Object> fieldMapping, Object value) {
26+
protected Object expected(Map<String, Object> fieldMapping, Object value, TestContext testContext) {
2727
var rawNullValue = fieldMapping.get("null_value");
2828

2929
Boolean nullValue;

server/src/test/java/org/elasticsearch/index/mapper/blockloader/DateFieldBlockLoaderTests.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ public DateFieldBlockLoaderTests(Params params) {
2929

3030
@Override
3131
@SuppressWarnings("unchecked")
32-
protected Object expected(Map<String, Object> fieldMapping, Object value) {
32+
protected Object expected(Map<String, Object> fieldMapping, Object value, TestContext testContext) {
3333
var format = (String) fieldMapping.get("format");
3434
var nullValue = fieldMapping.get("null_value") != null ? format(fieldMapping.get("null_value"), format) : null;
3535

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.mapper.blockloader;
11+
12+
import org.apache.lucene.util.BytesRef;
13+
import org.elasticsearch.common.geo.GeoPoint;
14+
import org.elasticsearch.geometry.Point;
15+
import org.elasticsearch.geometry.utils.WellKnownBinary;
16+
import org.elasticsearch.index.mapper.BlockLoaderTestCase;
17+
import org.elasticsearch.index.mapper.MappedFieldType;
18+
19+
import java.nio.ByteOrder;
20+
import java.util.ArrayList;
21+
import java.util.Comparator;
22+
import java.util.List;
23+
import java.util.Map;
24+
import java.util.Objects;
25+
26+
public class GeoPointFieldBlockLoaderTests extends BlockLoaderTestCase {
27+
public GeoPointFieldBlockLoaderTests(BlockLoaderTestCase.Params params) {
28+
super("geo_point", params);
29+
}
30+
31+
@Override
32+
@SuppressWarnings("unchecked")
33+
protected Object expected(Map<String, Object> fieldMapping, Object value, TestContext testContext) {
34+
var extractedFieldValues = (ExtractedFieldValues) value;
35+
var values = extractedFieldValues.values();
36+
37+
var rawNullValue = fieldMapping.get("null_value");
38+
39+
GeoPoint nullValue;
40+
if (rawNullValue == null) {
41+
nullValue = null;
42+
} else if (rawNullValue instanceof String s) {
43+
nullValue = convert(s, null);
44+
} else {
45+
throw new IllegalStateException("Unexpected null_value format");
46+
}
47+
48+
if (params.preference() == MappedFieldType.FieldExtractPreference.DOC_VALUES && hasDocValues(fieldMapping, true)) {
49+
if (values instanceof List<?> == false) {
50+
var point = convert(values, nullValue);
51+
return point != null ? point.getEncoded() : null;
52+
}
53+
54+
var resultList = ((List<Object>) values).stream()
55+
.map(v -> convert(v, nullValue))
56+
.filter(Objects::nonNull)
57+
.map(GeoPoint::getEncoded)
58+
.sorted()
59+
.toList();
60+
return maybeFoldList(resultList);
61+
}
62+
63+
if (params.syntheticSource() == false) {
64+
return exactValuesFromSource(values, nullValue);
65+
}
66+
67+
// Usually implementation of block loader from source adjusts values read from source
68+
// so that they look the same as doc_values would (like reducing precision).
69+
// geo_point does not do that and because of that we need to handle all these cases below.
70+
// If we are reading from stored source or fallback synthetic source we get the same exact data as source.
71+
// But if we are using "normal" synthetic source we get lesser precision data from doc_values.
72+
// That is unless "synthetic_source_keep" forces fallback synthetic source again.
73+
74+
if (testContext.forceFallbackSyntheticSource()) {
75+
return exactValuesFromSource(values, nullValue);
76+
}
77+
78+
String syntheticSourceKeep = (String) fieldMapping.getOrDefault("synthetic_source_keep", "none");
79+
if (syntheticSourceKeep.equals("all")) {
80+
return exactValuesFromSource(values, nullValue);
81+
}
82+
if (syntheticSourceKeep.equals("arrays") && extractedFieldValues.documentHasObjectArrays()) {
83+
return exactValuesFromSource(values, nullValue);
84+
}
85+
86+
// synthetic source and doc_values are present
87+
if (hasDocValues(fieldMapping, true)) {
88+
if (values instanceof List<?> == false) {
89+
return toWKB(normalize(convert(values, nullValue)));
90+
}
91+
92+
var resultList = ((List<Object>) values).stream()
93+
.map(v -> convert(v, nullValue))
94+
.filter(Objects::nonNull)
95+
.sorted(Comparator.comparingLong(GeoPoint::getEncoded))
96+
.map(p -> toWKB(normalize(p)))
97+
.toList();
98+
return maybeFoldList(resultList);
99+
}
100+
101+
// synthetic source but no doc_values so using fallback synthetic source
102+
return exactValuesFromSource(values, nullValue);
103+
}
104+
105+
@SuppressWarnings("unchecked")
106+
private Object exactValuesFromSource(Object value, GeoPoint nullValue) {
107+
if (value instanceof List<?> == false) {
108+
return toWKB(convert(value, nullValue));
109+
}
110+
111+
var resultList = ((List<Object>) value).stream().map(v -> convert(v, nullValue)).filter(Objects::nonNull).map(this::toWKB).toList();
112+
return maybeFoldList(resultList);
113+
}
114+
115+
private record ExtractedFieldValues(Object values, boolean documentHasObjectArrays) {}
116+
117+
@Override
118+
protected Object getFieldValue(Map<String, Object> document, String fieldName) {
119+
var extracted = new ArrayList<>();
120+
var documentHasObjectArrays = processLevel(document, fieldName, extracted, false);
121+
122+
if (extracted.size() == 1) {
123+
return new ExtractedFieldValues(extracted.get(0), documentHasObjectArrays);
124+
}
125+
126+
return new ExtractedFieldValues(extracted, documentHasObjectArrays);
127+
}
128+
129+
@SuppressWarnings("unchecked")
130+
private boolean processLevel(Map<String, Object> level, String field, ArrayList<Object> extracted, boolean documentHasObjectArrays) {
131+
if (field.contains(".") == false) {
132+
var value = level.get(field);
133+
processLeafLevel(value, extracted);
134+
return documentHasObjectArrays;
135+
}
136+
137+
var nameInLevel = field.split("\\.")[0];
138+
var entry = level.get(nameInLevel);
139+
if (entry instanceof Map<?, ?> m) {
140+
return processLevel((Map<String, Object>) m, field.substring(field.indexOf('.') + 1), extracted, documentHasObjectArrays);
141+
}
142+
if (entry instanceof List<?> l) {
143+
for (var object : l) {
144+
processLevel((Map<String, Object>) object, field.substring(field.indexOf('.') + 1), extracted, true);
145+
}
146+
return true;
147+
}
148+
149+
assert false : "unexpected document structure";
150+
return false;
151+
}
152+
153+
private void processLeafLevel(Object value, ArrayList<Object> extracted) {
154+
if (value instanceof List<?> l) {
155+
if (l.size() > 0 && l.get(0) instanceof Double) {
156+
// this must be a single point in array form
157+
// we'll put it into a different form here to make our lives a bit easier while implementing `expected`
158+
extracted.add(Map.of("type", "point", "coordinates", l));
159+
} else {
160+
// this is actually an array of points but there could still be points in array form inside
161+
for (var arrayValue : l) {
162+
processLeafLevel(arrayValue, extracted);
163+
}
164+
}
165+
} else {
166+
extracted.add(value);
167+
}
168+
}
169+
170+
@SuppressWarnings("unchecked")
171+
private GeoPoint convert(Object value, GeoPoint nullValue) {
172+
if (value == null) {
173+
return nullValue;
174+
}
175+
176+
if (value instanceof String s) {
177+
try {
178+
return new GeoPoint(s);
179+
} catch (Exception e) {
180+
return null;
181+
}
182+
}
183+
184+
if (value instanceof Map<?, ?> m) {
185+
if (m.get("type") != null) {
186+
var coordinates = (List<Double>) m.get("coordinates");
187+
// Order is GeoJSON is lon,lat
188+
return new GeoPoint(coordinates.get(1), coordinates.get(0));
189+
} else {
190+
return new GeoPoint((Double) m.get("lat"), (Double) m.get("lon"));
191+
}
192+
}
193+
194+
// Malformed values are excluded
195+
return null;
196+
}
197+
198+
private GeoPoint normalize(GeoPoint point) {
199+
if (point == null) {
200+
return null;
201+
}
202+
return point.resetFromEncoded(point.getEncoded());
203+
}
204+
205+
private BytesRef toWKB(GeoPoint point) {
206+
if (point == null) {
207+
return null;
208+
}
209+
210+
return new BytesRef(WellKnownBinary.toWKB(new Point(point.getX(), point.getY()), ByteOrder.LITTLE_ENDIAN));
211+
}
212+
}

server/src/test/java/org/elasticsearch/index/mapper/blockloader/KeywordFieldBlockLoaderTests.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public KeywordFieldBlockLoaderTests(Params params) {
2626

2727
@SuppressWarnings("unchecked")
2828
@Override
29-
protected Object expected(Map<String, Object> fieldMapping, Object value) {
29+
protected Object expected(Map<String, Object> fieldMapping, Object value, TestContext testContext) {
3030
var nullValue = (String) fieldMapping.get("null_value");
3131

3232
var ignoreAbove = fieldMapping.get("ignore_above") == null

0 commit comments

Comments
 (0)