Skip to content

Commit 05af243

Browse files
authored
Memory efficient xcontent filtering (backport of #77154) (#77653)
* Memory efficient xcontent filtering (backport of #77154) I found myself needing support for something like `filter_path` on `XContentParser`. It was simple enough to plug it in so I did. Then I realized that it might offer more memory efficient source filtering (#25168) so I put together a quick benchmark comparing the source filtering that we do in `_search`. Filtering using the parser is about 33% faster than how we filter now when you select a single field from a 300 byte document: ``` Benchmark (excludes) (includes) (source) Mode Cnt Score Error Units FetchSourcePhaseBenchmark.filterObjects message short avgt 5 2360.342 ± 4.715 ns/op FetchSourcePhaseBenchmark.filterXContentOnBuilder message short avgt 5 2010.278 ± 15.042 ns/op FetchSourcePhaseBenchmark.filterXContentOnParser message short avgt 5 1588.446 ± 18.593 ns/op ``` The top line is the way we filter now. The middle line is adding a filter to `XContentBuilder` - something we can do right now without any of my plumbing work. The bottom line is filtering on the parser, requiring all the new plumbing. This isn't particularly impresive. 33% *sounds* great! But 700 nanoseconds per document isn't going to cut into anyone's search times. If you fetch a thousand docuents that's .7 milliseconds of savings. But we mostly advise folks to use source filtering on fetch when the source is large and you only want a small part of it. So I tried when the source is about 4.3kb and you want a single field: ``` Benchmark (excludes) (includes) (source) Mode Cnt Score Error Units FetchSourcePhaseBenchmark.filterObjects message one_4k_field avgt 5 5957.128 ± 117.402 ns/op FetchSourcePhaseBenchmark.filterXContentOnBuilder message one_4k_field avgt 5 4999.073 ± 96.003 ns/op FetchSourcePhaseBenchmark.filterXContentonParser message one_4k_field avgt 5 3261.478 ± 48.879 ns/op ``` That's 45% faster. Put another way, 2.7 microseconds a document. Not bad! But have a look at how things come out when you want a single field from a 4 *megabyte* document: ``` Benchmark (excludes) (includes) (source) Mode Cnt Score Error Units FetchSourcePhaseBenchmark.filterObjects message one_4m_field avgt 5 8266343.036 ± 176197.077 ns/op FetchSourcePhaseBenchmark.filterXContentOnBuilder message one_4m_field avgt 5 6227560.013 ± 68306.318 ns/op FetchSourcePhaseBenchmark.filterXContentonParser message one_4m_field avgt 5 1617153.472 ± 80164.547 ns/op ``` These documents are very large. I've encountered documents like them in real life, but they've always been the outlier for me. But a 6.5 millisecond per document savings ain't anything to sneeze at. Take a look at what you get when I turn on gc metrics: ``` FetchSourcePhaseBenchmark.filterObjects message one_4m_field avgt 5 7036097.561 ± 84721.312 ns/op FetchSourcePhaseBenchmark.filterObjects:·gc.alloc.rate message one_4m_field avgt 5 2166.613 ± 25.975 MB/sec FetchSourcePhaseBenchmark.filterXContentOnBuilder message one_4m_field avgt 5 6104595.992 ± 55445.508 ns/op FetchSourcePhaseBenchmark.filterXContentOnBuilder:·gc.alloc.rate message one_4m_field avgt 5 2496.978 ± 22.650 MB/sec FetchSourcePhaseBenchmark.filterXContentonParser message one_4m_field avgt 5 1614980.846 ± 31716.956 ns/op FetchSourcePhaseBenchmark.filterXContentonParser:·gc.alloc.rate message one_4m_field avgt 5 1.755 ± 0.035 MB/sec ``` * Fixup benchmark for 7.x
1 parent d9be5e9 commit 05af243

File tree

15 files changed

+388
-18
lines changed

15 files changed

+388
-18
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
package org.elasticsearch.benchmark.search.fetch.subphase;
2+
3+
import org.elasticsearch.common.Strings;
4+
import org.elasticsearch.common.bytes.BytesArray;
5+
import org.elasticsearch.common.bytes.BytesReference;
6+
import org.elasticsearch.common.io.Streams;
7+
import org.elasticsearch.common.io.stream.BytesStreamOutput;
8+
import org.elasticsearch.common.xcontent.DeprecationHandler;
9+
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
10+
import org.elasticsearch.common.xcontent.XContentBuilder;
11+
import org.elasticsearch.common.xcontent.XContentParser;
12+
import org.elasticsearch.common.xcontent.XContentType;
13+
import org.elasticsearch.common.xcontent.support.filtering.FilterPath;
14+
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
15+
import org.elasticsearch.search.fetch.subphase.FetchSourcePhase;
16+
import org.elasticsearch.search.lookup.SourceLookup;
17+
import org.openjdk.jmh.annotations.Benchmark;
18+
import org.openjdk.jmh.annotations.BenchmarkMode;
19+
import org.openjdk.jmh.annotations.Fork;
20+
import org.openjdk.jmh.annotations.Measurement;
21+
import org.openjdk.jmh.annotations.Mode;
22+
import org.openjdk.jmh.annotations.OutputTimeUnit;
23+
import org.openjdk.jmh.annotations.Param;
24+
import org.openjdk.jmh.annotations.Scope;
25+
import org.openjdk.jmh.annotations.Setup;
26+
import org.openjdk.jmh.annotations.State;
27+
import org.openjdk.jmh.annotations.Warmup;
28+
29+
import java.io.IOException;
30+
import java.util.Collections;
31+
import java.util.Set;
32+
import java.util.concurrent.TimeUnit;
33+
34+
@Fork(1)
35+
@Warmup(iterations = 5)
36+
@Measurement(iterations = 5)
37+
@BenchmarkMode(Mode.AverageTime)
38+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
39+
@State(Scope.Benchmark)
40+
public class FetchSourcePhaseBenchmark {
41+
private BytesReference sourceBytes;
42+
private FetchSourceContext fetchContext;
43+
private Set<String> includesSet;
44+
private Set<String> excludesSet;
45+
private FilterPath[] includesFilters;
46+
private FilterPath[] excludesFilters;
47+
48+
@Param({ "tiny", "short", "one_4k_field", "one_4m_field" })
49+
private String source;
50+
@Param({ "message" })
51+
private String includes;
52+
@Param({ "" })
53+
private String excludes;
54+
55+
@Setup
56+
public void setup() throws IOException {
57+
switch (source) {
58+
case "tiny":
59+
sourceBytes = new BytesArray("{\"message\": \"short\"}");
60+
break;
61+
case "short":
62+
sourceBytes = read300BytesExample();
63+
break;
64+
case "one_4k_field":
65+
sourceBytes = buildBigExample(String.join("", Collections.nCopies(1024, "huge")));
66+
break;
67+
case "one_4m_field":
68+
sourceBytes = buildBigExample(String.join("", Collections.nCopies(1024 * 1024, "huge")));
69+
break;
70+
default:
71+
throw new IllegalArgumentException("Unknown source [" + source + "]");
72+
}
73+
fetchContext = new FetchSourceContext(
74+
true,
75+
Strings.splitStringByCommaToArray(includes),
76+
Strings.splitStringByCommaToArray(excludes)
77+
);
78+
includesSet = org.elasticsearch.core.Set.of(fetchContext.includes());
79+
excludesSet = org.elasticsearch.core.Set.of(fetchContext.excludes());
80+
includesFilters = FilterPath.compile(includesSet);
81+
excludesFilters = FilterPath.compile(excludesSet);
82+
}
83+
84+
private BytesReference read300BytesExample() throws IOException {
85+
return Streams.readFully(FetchSourcePhaseBenchmark.class.getResourceAsStream("300b_example.json"));
86+
}
87+
88+
private BytesReference buildBigExample(String extraText) throws IOException {
89+
String bigger = read300BytesExample().utf8ToString();
90+
bigger = "{\"huge\": \"" + extraText + "\"," + bigger.substring(1);
91+
return new BytesArray(bigger);
92+
}
93+
94+
@Benchmark
95+
public BytesReference filterObjects() throws IOException {
96+
SourceLookup lookup = new SourceLookup();
97+
lookup.setSource(sourceBytes);
98+
Object value = lookup.filter(fetchContext);
99+
return FetchSourcePhase.objectToBytes(value, XContentType.JSON, Math.min(1024, lookup.internalSourceRef().length()));
100+
}
101+
102+
@Benchmark
103+
public BytesReference filterXContentOnParser() throws IOException {
104+
BytesStreamOutput streamOutput = new BytesStreamOutput(Math.min(1024, sourceBytes.length()));
105+
XContentBuilder builder = new XContentBuilder(XContentType.JSON.xContent(), streamOutput);
106+
try (
107+
XContentParser parser = XContentType.JSON.xContent()
108+
.createParser(
109+
NamedXContentRegistry.EMPTY,
110+
DeprecationHandler.THROW_UNSUPPORTED_OPERATION,
111+
sourceBytes.streamInput(),
112+
includesFilters,
113+
excludesFilters
114+
)
115+
) {
116+
builder.copyCurrentStructure(parser);
117+
return BytesReference.bytes(builder);
118+
}
119+
}
120+
121+
@Benchmark
122+
public BytesReference filterXContentOnBuilder() throws IOException {
123+
BytesStreamOutput streamOutput = new BytesStreamOutput(Math.min(1024, sourceBytes.length()));
124+
XContentBuilder builder = new XContentBuilder(XContentType.JSON.xContent(), streamOutput, includesSet, excludesSet);
125+
try (
126+
XContentParser parser = XContentType.JSON.xContent()
127+
.createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, sourceBytes.streamInput())
128+
) {
129+
builder.copyCurrentStructure(parser);
130+
return BytesReference.bytes(builder);
131+
}
132+
}
133+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"@timestamp": "2099-11-15T14:12:12",
3+
"http": {
4+
"request": {
5+
"method": "get"
6+
},
7+
"response": {
8+
"bytes": 1070000,
9+
"status_code": 200
10+
},
11+
"version": "1.1"
12+
},
13+
"message": "GET /search HTTP/1.1 200 1070000",
14+
"source": {
15+
"ip": "192.168.0.1"
16+
},
17+
"user": {
18+
"id": "user"
19+
}
20+
}

libs/x-content/src/main/java/org/elasticsearch/common/xcontent/XContent.java

+13
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
package org.elasticsearch.common.xcontent;
1010

11+
import org.elasticsearch.common.xcontent.support.filtering.FilterPath;
12+
1113
import java.io.IOException;
1214
import java.io.InputStream;
1315
import java.io.OutputStream;
@@ -56,6 +58,17 @@ XContentParser createParser(NamedXContentRegistry xContentRegistry,
5658
XContentParser createParser(NamedXContentRegistry xContentRegistry,
5759
DeprecationHandler deprecationHandler, InputStream is) throws IOException;
5860

61+
/**
62+
* Creates a parser over the provided input stream.
63+
*/
64+
XContentParser createParser(
65+
NamedXContentRegistry xContentRegistry,
66+
DeprecationHandler deprecationHandler,
67+
InputStream is,
68+
FilterPath[] includes,
69+
FilterPath[] excludes
70+
) throws IOException;
71+
5972
/**
6073
* Creates a parser over the provided bytes.
6174
*/

libs/x-content/src/main/java/org/elasticsearch/common/xcontent/cbor/CborXContent.java

+19
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import com.fasterxml.jackson.core.JsonGenerator;
1313
import com.fasterxml.jackson.core.JsonParser;
1414
import com.fasterxml.jackson.dataformat.cbor.CBORFactory;
15+
1516
import org.elasticsearch.common.xcontent.DeprecationHandler;
1617
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
1718
import org.elasticsearch.common.xcontent.XContent;
@@ -20,6 +21,7 @@
2021
import org.elasticsearch.common.xcontent.XContentParseException;
2122
import org.elasticsearch.common.xcontent.XContentParser;
2223
import org.elasticsearch.common.xcontent.XContentType;
24+
import org.elasticsearch.common.xcontent.support.filtering.FilterPath;
2325

2426
import java.io.ByteArrayInputStream;
2527
import java.io.IOException;
@@ -79,6 +81,23 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry,
7981
return new CborXContentParser(xContentRegistry, deprecationHandler, cborFactory.createParser(is));
8082
}
8183

84+
@Override
85+
public XContentParser createParser(
86+
NamedXContentRegistry xContentRegistry,
87+
DeprecationHandler deprecationHandler,
88+
InputStream is,
89+
FilterPath[] includes,
90+
FilterPath[] excludes
91+
) throws IOException {
92+
return new CborXContentParser(
93+
xContentRegistry,
94+
deprecationHandler,
95+
cborFactory.createParser(is),
96+
includes,
97+
excludes
98+
);
99+
}
100+
82101
@Override
83102
public XContentParser createParser(NamedXContentRegistry xContentRegistry,
84103
DeprecationHandler deprecationHandler, byte[] data) throws IOException {

libs/x-content/src/main/java/org/elasticsearch/common/xcontent/cbor/CborXContentParser.java

+12
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@
99
package org.elasticsearch.common.xcontent.cbor;
1010

1111
import com.fasterxml.jackson.core.JsonParser;
12+
1213
import org.elasticsearch.common.xcontent.DeprecationHandler;
1314
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
1415
import org.elasticsearch.common.xcontent.XContentType;
1516
import org.elasticsearch.common.xcontent.json.JsonXContentParser;
17+
import org.elasticsearch.common.xcontent.support.filtering.FilterPath;
1618

1719
public class CborXContentParser extends JsonXContentParser {
1820

@@ -21,6 +23,16 @@ public CborXContentParser(NamedXContentRegistry xContentRegistry,
2123
super(xContentRegistry, deprecationHandler, parser);
2224
}
2325

26+
public CborXContentParser(
27+
NamedXContentRegistry xContentRegistry,
28+
DeprecationHandler deprecationHandler,
29+
JsonParser parser,
30+
FilterPath[] includes,
31+
FilterPath[] excludes
32+
) {
33+
super(xContentRegistry, deprecationHandler, parser, includes, excludes);
34+
}
35+
2436
@Override
2537
public XContentType contentType() {
2638
return XContentType.CBOR;

libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContent.java

+19
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,15 @@
1212
import com.fasterxml.jackson.core.JsonFactory;
1313
import com.fasterxml.jackson.core.JsonGenerator;
1414
import com.fasterxml.jackson.core.JsonParser;
15+
1516
import org.elasticsearch.common.xcontent.DeprecationHandler;
1617
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
1718
import org.elasticsearch.common.xcontent.XContent;
1819
import org.elasticsearch.common.xcontent.XContentBuilder;
1920
import org.elasticsearch.common.xcontent.XContentGenerator;
2021
import org.elasticsearch.common.xcontent.XContentParser;
2122
import org.elasticsearch.common.xcontent.XContentType;
23+
import org.elasticsearch.common.xcontent.support.filtering.FilterPath;
2224

2325
import java.io.ByteArrayInputStream;
2426
import java.io.IOException;
@@ -80,6 +82,23 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry,
8082
return new JsonXContentParser(xContentRegistry, deprecationHandler, jsonFactory.createParser(is));
8183
}
8284

85+
@Override
86+
public XContentParser createParser(
87+
NamedXContentRegistry xContentRegistry,
88+
DeprecationHandler deprecationHandler,
89+
InputStream is,
90+
FilterPath[] include,
91+
FilterPath[] exclude
92+
) throws IOException {
93+
return new JsonXContentParser(
94+
xContentRegistry,
95+
deprecationHandler,
96+
jsonFactory.createParser(is),
97+
include,
98+
exclude
99+
);
100+
}
101+
83102
@Override
84103
public XContentParser createParser(NamedXContentRegistry xContentRegistry,
85104
DeprecationHandler deprecationHandler, byte[] data) throws IOException {

libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContentParser.java

+28
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,15 @@
1111
import com.fasterxml.jackson.core.JsonLocation;
1212
import com.fasterxml.jackson.core.JsonParser;
1313
import com.fasterxml.jackson.core.JsonToken;
14+
import com.fasterxml.jackson.core.filter.FilteringParserDelegate;
15+
1416
import org.elasticsearch.common.xcontent.DeprecationHandler;
1517
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
1618
import org.elasticsearch.common.xcontent.XContentLocation;
1719
import org.elasticsearch.common.xcontent.XContentType;
1820
import org.elasticsearch.common.xcontent.support.AbstractXContentParser;
21+
import org.elasticsearch.common.xcontent.support.filtering.FilterPath;
22+
import org.elasticsearch.common.xcontent.support.filtering.FilterPathBasedFilter;
1923
import org.elasticsearch.core.internal.io.IOUtils;
2024

2125
import java.io.IOException;
@@ -31,6 +35,30 @@ public JsonXContentParser(NamedXContentRegistry xContentRegistry,
3135
this.parser = parser;
3236
}
3337

38+
public JsonXContentParser(
39+
NamedXContentRegistry xContentRegistry,
40+
DeprecationHandler deprecationHandler,
41+
JsonParser parser,
42+
FilterPath[] include,
43+
FilterPath[] exclude
44+
) {
45+
super(xContentRegistry, deprecationHandler);
46+
JsonParser filtered = parser;
47+
if (exclude != null) {
48+
for (FilterPath e : exclude) {
49+
if (e.hasDoubleWildcard()) {
50+
// Fixed in Jackson 2.13 - https://github.com/FasterXML/jackson-core/issues/700
51+
throw new UnsupportedOperationException("double wildcards are not supported in filtered excludes");
52+
}
53+
}
54+
filtered = new FilteringParserDelegate(filtered, new FilterPathBasedFilter(exclude, false), true, true);
55+
}
56+
if (include != null) {
57+
filtered = new FilteringParserDelegate(filtered, new FilterPathBasedFilter(include, true), true, true);
58+
}
59+
this.parser = filtered;
60+
}
61+
3462
@Override
3563
public XContentType contentType() {
3664
return XContentType.JSON;

libs/x-content/src/main/java/org/elasticsearch/common/xcontent/smile/SmileXContent.java

+19
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,15 @@
1313
import com.fasterxml.jackson.core.JsonParser;
1414
import com.fasterxml.jackson.dataformat.smile.SmileFactory;
1515
import com.fasterxml.jackson.dataformat.smile.SmileGenerator;
16+
1617
import org.elasticsearch.common.xcontent.DeprecationHandler;
1718
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
1819
import org.elasticsearch.common.xcontent.XContent;
1920
import org.elasticsearch.common.xcontent.XContentBuilder;
2021
import org.elasticsearch.common.xcontent.XContentGenerator;
2122
import org.elasticsearch.common.xcontent.XContentParser;
2223
import org.elasticsearch.common.xcontent.XContentType;
24+
import org.elasticsearch.common.xcontent.support.filtering.FilterPath;
2325

2426
import java.io.ByteArrayInputStream;
2527
import java.io.IOException;
@@ -81,6 +83,23 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry,
8183
return new SmileXContentParser(xContentRegistry, deprecationHandler, smileFactory.createParser(is));
8284
}
8385

86+
@Override
87+
public XContentParser createParser(
88+
NamedXContentRegistry xContentRegistry,
89+
DeprecationHandler deprecationHandler,
90+
InputStream is,
91+
FilterPath[] include,
92+
FilterPath[] exclude
93+
) throws IOException {
94+
return new SmileXContentParser(
95+
xContentRegistry,
96+
deprecationHandler,
97+
smileFactory.createParser(is),
98+
include,
99+
exclude
100+
);
101+
}
102+
84103
@Override
85104
public XContentParser createParser(NamedXContentRegistry xContentRegistry,
86105
DeprecationHandler deprecationHandler, byte[] data) throws IOException {

0 commit comments

Comments
 (0)