-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Add binary doc value compression with variable doc count blocks #137139
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 10 commits
74880a0
a973713
3fc95dc
c302cc2
99748c8
fa2ea11
b67dd58
638dbbc
fdf3428
36b3e10
eeded36
2d8e6dc
efa270f
c4d67e5
06a2035
7ccb18d
a57e0d4
f156e55
91e5842
401a041
ad55bc3
4d4e153
f1ff182
9d2f237
2269f9c
1c4e9dc
3ddb649
dbcd1c6
5537d8c
d7fce75
bb8361c
aa3d44f
2c1f143
636c150
09898ff
8b8b50b
8a82c23
cef255f
718ffc6
ebda5b0
9fc23f1
49e5425
80525bf
b1d4b17
e332619
60ebfaa
1209e78
80c14a3
602c203
d6293d9
982386e
e61b8c2
a225b98
f6fd5bd
5fe2c80
51b21ae
07eeb5a
d56d12f
980df97
21a98ac
2239732
15823e8
25dcb56
200e14c
5ca24b4
7f8fa16
91c23ee
92c8050
016352a
8a2af81
026406b
d27bb8b
db68af6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the "Elastic License | ||
| * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
| * Public License v 1"; you may not use this file except in compliance with, at | ||
| * your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
| * License v3.0 only", or the "Server Side Public License, v 1". | ||
| */ | ||
|
|
||
| package org.elasticsearch.index.codec.tsdb; | ||
|
|
||
| public enum BinaryDVCompressionMode { | ||
parkertimmins marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| NO_COMPRESS((byte) 0), | ||
| COMPRESSED_WITH_ZSTD((byte) 1); | ||
parkertimmins marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| public final byte code; | ||
|
|
||
| BinaryDVCompressionMode(byte code) { | ||
| this.code = code; | ||
| } | ||
|
|
||
| public static BinaryDVCompressionMode fromMode(byte mode) { | ||
| return switch (mode) { | ||
| case 0 -> NO_COMPRESS; | ||
| case 1 -> COMPRESSED_WITH_ZSTD; | ||
| default -> throw new IllegalStateException("unknown compression mode [" + mode + "]"); | ||
| }; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,111 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the "Elastic License | ||
| * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
| * Public License v 1"; you may not use this file except in compliance with, at | ||
| * your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
| * License v3.0 only", or the "Server Side Public License, v 1". | ||
| */ | ||
|
|
||
| package org.elasticsearch.index.codec.tsdb.es819; | ||
|
|
||
| import org.apache.lucene.codecs.CodecUtil; | ||
| import org.apache.lucene.store.ChecksumIndexInput; | ||
| import org.apache.lucene.store.Directory; | ||
| import org.apache.lucene.store.IOContext; | ||
| import org.apache.lucene.store.IndexOutput; | ||
| import org.apache.lucene.util.IOUtils; | ||
| import org.apache.lucene.util.packed.DirectMonotonicWriter; | ||
|
|
||
| import java.io.Closeable; | ||
| import java.io.IOException; | ||
|
|
||
| /** | ||
| * Like OffsetsAccumulator builds offsets and stores in a DirectMonotonicWriter. But write to temp file | ||
| * rather than directly to a DirectMonotonicWriter because the number of values is unknown. If number of | ||
| * values if known prefer OffsetsWriter. | ||
| */ | ||
| final class DelayedOffsetAccumulator implements Closeable { | ||
parkertimmins marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
parkertimmins marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| private final Directory dir; | ||
| private final long startOffset; | ||
|
|
||
| private int numValues = 0; | ||
| private final IndexOutput tempOutput; | ||
| private final String suffix; | ||
|
|
||
| DelayedOffsetAccumulator( | ||
| Directory dir, | ||
| IOContext context, | ||
| IndexOutput data, | ||
| String suffix, | ||
| long startOffset | ||
| ) throws IOException { | ||
| this.dir = dir; | ||
| this.startOffset = startOffset; | ||
| this.suffix = suffix; | ||
|
|
||
| boolean success = false; | ||
| try { | ||
| tempOutput = dir.createTempOutput(data.getName(), suffix, context); | ||
| CodecUtil.writeHeader( | ||
|
||
| tempOutput, | ||
| ES819TSDBDocValuesFormat.META_CODEC + suffix, | ||
| ES819TSDBDocValuesFormat.VERSION_CURRENT | ||
| ); | ||
| success = true; | ||
| } | ||
| finally { | ||
| if (success == false) { | ||
| IOUtils.closeWhileHandlingException(this); // self-close because constructor caller can't | ||
|
||
| } | ||
| } | ||
| } | ||
|
|
||
| public void addDoc(long delta) throws IOException { | ||
| tempOutput.writeVLong(delta); | ||
| numValues++; | ||
| } | ||
|
|
||
| public void build(IndexOutput meta, IndexOutput data) throws IOException { | ||
| CodecUtil.writeFooter(tempOutput); | ||
| IOUtils.close(tempOutput); | ||
|
|
||
| // write the offsets info to the meta file by reading from temp file | ||
| try (ChecksumIndexInput tempInput = dir.openChecksumInput(tempOutput.getName());) { | ||
| CodecUtil.checkHeader( | ||
| tempInput, | ||
| ES819TSDBDocValuesFormat.META_CODEC + suffix, | ||
| ES819TSDBDocValuesFormat.VERSION_CURRENT, | ||
| ES819TSDBDocValuesFormat.VERSION_CURRENT | ||
| ); | ||
| Throwable priorE = null; | ||
| try { | ||
| final DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance( | ||
| meta, | ||
| data, | ||
| numValues + 1, | ||
| ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT | ||
| ); | ||
|
|
||
| long offset = startOffset; | ||
| writer.add(offset); | ||
| for (int i = 0; i < numValues; ++i) { | ||
| offset += tempInput.readVLong(); | ||
| writer.add(offset); | ||
| } | ||
| writer.finish(); | ||
| } catch (Throwable e) { | ||
| priorE = e; | ||
| } finally { | ||
| CodecUtil.checkFooter(tempInput, priorE); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public void close() throws IOException { | ||
| if (tempOutput != null) { | ||
| IOUtils.close(tempOutput, () -> dir.deleteFile(tempOutput.getName())); | ||
parkertimmins marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.