Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
74880a0
Copy binary compression from LUCENE-9211
parkertimmins Oct 23, 2025
a973713
Initial version of block withs variable number values
parkertimmins Oct 23, 2025
3fc95dc
Fix issue with index output unclosed
parkertimmins Oct 23, 2025
c302cc2
Changes docRanges to single limit per block, plus start of 0
parkertimmins Oct 23, 2025
99748c8
Factor block address and block doc offset to accumulator class
parkertimmins Oct 23, 2025
fa2ea11
Rename offset accumulator
parkertimmins Oct 24, 2025
b67dd58
Change lz4 to zstd
parkertimmins Oct 24, 2025
638dbbc
Fix direct monotonic reader size
parkertimmins Oct 24, 2025
fdf3428
Fix docRangeLen bug, use for non-logsdb wildcards
parkertimmins Oct 24, 2025
36b3e10
Change offset encoding from zstd to numeric
parkertimmins Oct 24, 2025
eeded36
[CI] Auto commit changes from spotless
Oct 24, 2025
2d8e6dc
Fix missing compression in es819 format
parkertimmins Oct 25, 2025
efa270f
Merge branch 'main' into parker/compressed-binary-doc-values
parkertimmins Oct 25, 2025
c4d67e5
Store offsets rather than lengths
parkertimmins Oct 25, 2025
06a2035
[CI] Auto commit changes from spotless
Oct 25, 2025
7ccb18d
Remove forbidden APIs
parkertimmins Oct 25, 2025
a57e0d4
[CI] Auto commit changes from spotless
Oct 25, 2025
f156e55
Binary search to find block containing docNum
parkertimmins Oct 27, 2025
91e5842
[CI] Auto commit changes from spotless
Oct 27, 2025
401a041
do not mmap temp offset files
parkertimmins Oct 27, 2025
ad55bc3
feedback
parkertimmins Oct 27, 2025
4d4e153
[CI] Auto commit changes from spotless
Oct 27, 2025
f1ff182
Move zstd (de)compressor to separate class
parkertimmins Oct 27, 2025
9d2f237
Combine doAddCompressedBinary and doAddUncompressedBinary
parkertimmins Oct 27, 2025
2269f9c
[CI] Auto commit changes from spotless
Oct 27, 2025
1c4e9dc
feedback
parkertimmins Oct 28, 2025
3ddb649
Add WildcardRollingUpgradeIT
parkertimmins Oct 28, 2025
dbcd1c6
need new compressor/decompressor for new block writer
parkertimmins Oct 29, 2025
5537d8c
[CI] Auto commit changes from spotless
Oct 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.elasticsearch.cluster.metadata.DataStream;
import org.elasticsearch.common.logging.LogConfigurator;
import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec;
import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode;
import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
Expand Down Expand Up @@ -257,7 +258,12 @@ private static IndexWriterConfig createIndexWriterConfig(boolean optimizedMergeE
);
config.setLeafSorter(DataStream.TIMESERIES_LEAF_READERS_SORTER);
config.setMergePolicy(new LogByteSizeMergePolicy());
var docValuesFormat = new ES819TSDBDocValuesFormat(4096, 512, optimizedMergeEnabled);
var docValuesFormat = new ES819TSDBDocValuesFormat(
4096,
512,
optimizedMergeEnabled,
BinaryDVCompressionMode.COMPRESSED_WITH_ZSTD_LEVEL_1
);
config.setCodec(new Elasticsearch92Lucene103Codec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec.tsdb;

import org.apache.lucene.codecs.compressing.CompressionMode;
import org.elasticsearch.index.codec.zstd.ZstdCompressionMode;

public enum BinaryDVCompressionMode {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe make more use of this abstraction here? For example I think we can add methods:

  • To get the Compressor instance. For NO_COMPRESS this would return null and for the other this would return ZstdCompressor.
  • Add minBlockBytes() that returns CompressedBinaryBlockWriter#MIN_BLOCK_BYTES or -1 (for uncompressed).
  • And perhaps add a level() method that returns -1 or CompressedBinaryBlockWriter#ZSTD_LEVEL? But maybe this isn't necessary.

Copy link
Contributor

@Kubik42 Kubik42 Oct 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One thing to add to Martijn's comment:

If you decide to return a Compressor, lets return a NoOpCompressor instead of null for when NO_COMPRESS mode is used? This way, we don't need to worry about null checks. The NoOpCompressor can just be an empty class.


NO_COMPRESS((byte) 0, null),
COMPRESSED_WITH_ZSTD_LEVEL_1((byte) 1, new ZstdCompressionMode(1));

public final byte code;
public final CompressionMode compressionMode;

BinaryDVCompressionMode(byte code, CompressionMode compressionMode) {
this.code = code;
this.compressionMode = compressionMode;
}

public static BinaryDVCompressionMode fromMode(byte mode) {
return switch (mode) {
case 0 -> NO_COMPRESS;
case 1 -> COMPRESSED_WITH_ZSTD_LEVEL_1;
default -> throw new IllegalStateException("unknown compression mode [" + mode + "]");
};
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec.tsdb.es819;

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.packed.DirectMonotonicWriter;
import org.elasticsearch.core.IOUtils;

import java.io.Closeable;
import java.io.IOException;

/**
* Like OffsetsAccumulator builds offsets and stores in a DirectMonotonicWriter. But write to temp file
* rather than directly to a DirectMonotonicWriter because the number of values is unknown. If number of
* values is known prefer OffsetsWriter.
*/
final class DelayedOffsetAccumulator implements Closeable {
private final Directory dir;
private final long startOffset;

private int numValues = 0;
private final IndexOutput tempOutput;
private final String suffix;

DelayedOffsetAccumulator(Directory dir, IOContext context, IndexOutput data, String suffix, long startOffset) throws IOException {
this.dir = dir;
this.startOffset = startOffset;
this.suffix = suffix;

boolean success = false;
try {
tempOutput = dir.createTempOutput(data.getName(), suffix, context);
CodecUtil.writeHeader(tempOutput, ES819TSDBDocValuesFormat.META_CODEC + suffix, ES819TSDBDocValuesFormat.VERSION_CURRENT);
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(this); // self-close because constructor caller can't
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be tested

}
}
}

public void addDoc(long delta) throws IOException {
tempOutput.writeVLong(delta);
numValues++;
}

public void build(IndexOutput meta, IndexOutput data) throws IOException {
CodecUtil.writeFooter(tempOutput);
IOUtils.close(tempOutput);

// write the offsets info to the meta file by reading from temp file
try (ChecksumIndexInput tempInput = dir.openChecksumInput(tempOutput.getName());) {
CodecUtil.checkHeader(
tempInput,
ES819TSDBDocValuesFormat.META_CODEC + suffix,
ES819TSDBDocValuesFormat.VERSION_CURRENT,
ES819TSDBDocValuesFormat.VERSION_CURRENT
);
Throwable priorE = null;
try {
final DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance(
meta,
data,
numValues + 1,
ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
);

long offset = startOffset;
writer.add(offset);
for (int i = 0; i < numValues; ++i) {
offset += tempInput.readVLong();
writer.add(offset);
}
writer.finish();
} catch (Throwable e) {
priorE = e;
} finally {
CodecUtil.checkFooter(tempInput, priorE);
}
}
}

@Override
public void close() throws IOException {
if (tempOutput != null) {
IOUtils.close(tempOutput, () -> dir.deleteFile(tempOutput.getName()));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a question: Is it safe to call close() here if the underlying resource is already closed?

}
}
}
Loading