-
Couldn't load subscription status.
- Fork 25.6k
Add binary doc value compression with variable doc count blocks #137139
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
74880a0
a973713
3fc95dc
c302cc2
99748c8
fa2ea11
b67dd58
638dbbc
fdf3428
36b3e10
eeded36
2d8e6dc
efa270f
c4d67e5
06a2035
7ccb18d
a57e0d4
f156e55
91e5842
401a041
ad55bc3
4d4e153
f1ff182
9d2f237
2269f9c
1c4e9dc
3ddb649
dbcd1c6
5537d8c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the "Elastic License | ||
| * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
| * Public License v 1"; you may not use this file except in compliance with, at | ||
| * your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
| * License v3.0 only", or the "Server Side Public License, v 1". | ||
| */ | ||
|
|
||
| package org.elasticsearch.index.codec.tsdb; | ||
|
|
||
| import org.apache.lucene.codecs.compressing.CompressionMode; | ||
| import org.elasticsearch.index.codec.zstd.ZstdCompressionMode; | ||
|
|
||
| public enum BinaryDVCompressionMode { | ||
|
|
||
| NO_COMPRESS((byte) 0, null), | ||
| COMPRESSED_WITH_ZSTD_LEVEL_1((byte) 1, new ZstdCompressionMode(1)); | ||
|
|
||
| public final byte code; | ||
| public final CompressionMode compressionMode; | ||
|
|
||
| BinaryDVCompressionMode(byte code, CompressionMode compressionMode) { | ||
| this.code = code; | ||
| this.compressionMode = compressionMode; | ||
| } | ||
|
|
||
| public static BinaryDVCompressionMode fromMode(byte mode) { | ||
| return switch (mode) { | ||
| case 0 -> NO_COMPRESS; | ||
| case 1 -> COMPRESSED_WITH_ZSTD_LEVEL_1; | ||
| default -> throw new IllegalStateException("unknown compression mode [" + mode + "]"); | ||
| }; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,100 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the "Elastic License | ||
| * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
| * Public License v 1"; you may not use this file except in compliance with, at | ||
| * your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
| * License v3.0 only", or the "Server Side Public License, v 1". | ||
| */ | ||
|
|
||
| package org.elasticsearch.index.codec.tsdb.es819; | ||
|
|
||
| import org.apache.lucene.codecs.CodecUtil; | ||
| import org.apache.lucene.store.ChecksumIndexInput; | ||
| import org.apache.lucene.store.Directory; | ||
| import org.apache.lucene.store.IOContext; | ||
| import org.apache.lucene.store.IndexOutput; | ||
| import org.apache.lucene.util.packed.DirectMonotonicWriter; | ||
| import org.elasticsearch.core.IOUtils; | ||
|
|
||
| import java.io.Closeable; | ||
| import java.io.IOException; | ||
|
|
||
| /** | ||
| * Like OffsetsAccumulator builds offsets and stores in a DirectMonotonicWriter. But write to temp file | ||
| * rather than directly to a DirectMonotonicWriter because the number of values is unknown. If number of | ||
| * values is known prefer OffsetsWriter. | ||
| */ | ||
| final class DelayedOffsetAccumulator implements Closeable { | ||
parkertimmins marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| private final Directory dir; | ||
| private final long startOffset; | ||
|
|
||
| private int numValues = 0; | ||
| private final IndexOutput tempOutput; | ||
| private final String suffix; | ||
|
|
||
| DelayedOffsetAccumulator(Directory dir, IOContext context, IndexOutput data, String suffix, long startOffset) throws IOException { | ||
| this.dir = dir; | ||
| this.startOffset = startOffset; | ||
| this.suffix = suffix; | ||
|
|
||
| boolean success = false; | ||
| try { | ||
| tempOutput = dir.createTempOutput(data.getName(), suffix, context); | ||
| CodecUtil.writeHeader(tempOutput, ES819TSDBDocValuesFormat.META_CODEC + suffix, ES819TSDBDocValuesFormat.VERSION_CURRENT); | ||
| success = true; | ||
| } finally { | ||
| if (success == false) { | ||
| IOUtils.closeWhileHandlingException(this); // self-close because constructor caller can't | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should be tested |
||
| } | ||
| } | ||
| } | ||
|
|
||
| public void addDoc(long delta) throws IOException { | ||
| tempOutput.writeVLong(delta); | ||
| numValues++; | ||
| } | ||
|
|
||
| public void build(IndexOutput meta, IndexOutput data) throws IOException { | ||
| CodecUtil.writeFooter(tempOutput); | ||
| IOUtils.close(tempOutput); | ||
|
|
||
| // write the offsets info to the meta file by reading from temp file | ||
| try (ChecksumIndexInput tempInput = dir.openChecksumInput(tempOutput.getName());) { | ||
| CodecUtil.checkHeader( | ||
| tempInput, | ||
| ES819TSDBDocValuesFormat.META_CODEC + suffix, | ||
| ES819TSDBDocValuesFormat.VERSION_CURRENT, | ||
| ES819TSDBDocValuesFormat.VERSION_CURRENT | ||
| ); | ||
| Throwable priorE = null; | ||
| try { | ||
| final DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance( | ||
| meta, | ||
| data, | ||
| numValues + 1, | ||
| ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT | ||
| ); | ||
|
|
||
| long offset = startOffset; | ||
| writer.add(offset); | ||
| for (int i = 0; i < numValues; ++i) { | ||
| offset += tempInput.readVLong(); | ||
| writer.add(offset); | ||
| } | ||
| writer.finish(); | ||
| } catch (Throwable e) { | ||
| priorE = e; | ||
| } finally { | ||
| CodecUtil.checkFooter(tempInput, priorE); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public void close() throws IOException { | ||
| if (tempOutput != null) { | ||
| IOUtils.close(tempOutput, () -> dir.deleteFile(tempOutput.getName())); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just a question: Is it safe to call |
||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe make more use of this abstraction here? For example I think we can add methods:
Compressorinstance. For NO_COMPRESS this would return null and for the other this would returnZstdCompressor.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One thing to add to Martijn's comment:
If you decide to return a
Compressor, lets return aNoOpCompressorinstead of null for whenNO_COMPRESSmode is used? This way, we don't need to worry about null checks. TheNoOpCompressorcan just be an empty class.