Skip to content

Optimize BSON decoding #1667

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bson/src/main/org/bson/ByteBuf.java
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ public interface ByteBuf {
* @return {@code true} if, and only if, this buffer is backed by an array and is not read-only
* @since 5.5
*/
boolean hasArray();
boolean isBackedByArray();

/**
* Returns the offset of the first byte within the backing byte array of
Expand Down
2 changes: 1 addition & 1 deletion bson/src/main/org/bson/ByteBufNIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ public byte[] array() {
}

@Override
public boolean hasArray() {
public boolean isBackedByArray() {
return buf.hasArray();
}

Expand Down
52 changes: 52 additions & 0 deletions bson/src/main/org/bson/internal/PlatformUtil.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright 2008-present MongoDB, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.bson.internal;

/**
* Utility class for platform-specific operations.
* This class is not part of the public API and may be removed or changed at any time.
*/
public final class PlatformUtil {

private PlatformUtil() {}

// These architectures support unaligned memory access.
// While others might as well, it's safer to assume they don't.
private static final String[] ARCHITECTURES_ALLOWING_UNALIGNED_ACCESS = {
"x86",
"amd64",
"i386",
"x86_64",
"arm64", // evergreen dbx-perf-distro uses this architecture
"aarch64"};

public static boolean isUnalignedAccessAllowed() {
try {
String processArch = System.getProperty("os.arch");
for (String supportedArch : ARCHITECTURES_ALLOWING_UNALIGNED_ACCESS) {
if (supportedArch.equals(processArch)) {
return true;
}
}
return false;
} catch (Exception e) {
// Ignore security exception and proceed with default value
return false;
}
}
}

88 changes: 71 additions & 17 deletions bson/src/main/org/bson/io/ByteBufferBsonInput.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.nio.charset.StandardCharsets;

import static java.lang.String.format;
import static org.bson.internal.PlatformUtil.isUnalignedAccessAllowed;

/**
* An implementation of {@code BsonInput} that is backed by a {@code ByteBuf}.
Expand All @@ -33,6 +34,14 @@
public class ByteBufferBsonInput implements BsonInput {

private static final String[] ONE_BYTE_ASCII_STRINGS = new String[Byte.MAX_VALUE + 1];
private static final boolean UNALIGNED_ACCESS_SUPPORTED = isUnalignedAccessAllowed();
/* A dynamically sized scratch buffer, that is reused across BSON String reads:
* 1. Reduces garbage collection by avoiding new byte array creation.
* 2. Improves cache utilization through temporal locality.
* 3. Avoids JVM allocation and zeroing cost for new memory allocations.
*/
private byte[] scratchBuffer;


static {
for (int b = 0; b < ONE_BYTE_ASCII_STRINGS.length; b++) {
Expand Down Expand Up @@ -127,15 +136,12 @@ public String readString() {

@Override
public String readCString() {
int mark = buffer.position();
skipCString();
int size = buffer.position() - mark;
buffer.position(mark);
int size = computeCStringLength(buffer.position());
return readString(size);
}

private String readString(final int size) {
if (size == 2) {
private String readString(final int bsonStringSize) {
if (bsonStringSize == 2) {
byte asciiByte = buffer.get(); // if only one byte in the string, it must be ascii.
byte nullByte = buffer.get(); // read null terminator
if (nullByte != 0) {
Expand All @@ -146,26 +152,74 @@ private String readString(final int size) {
}
return ONE_BYTE_ASCII_STRINGS[asciiByte]; // this will throw if asciiByte is negative
} else {
byte[] bytes = new byte[size - 1];
buffer.get(bytes);
byte nullByte = buffer.get();
if (nullByte != 0) {
throw new BsonSerializationException("Found a BSON string that is not null-terminated");
if (buffer.isBackedByArray()) {
int position = buffer.position();
int arrayOffset = buffer.arrayOffset();
int newPosition = position + bsonStringSize;
buffer.position(newPosition);

byte[] array = buffer.array();
if (array[arrayOffset + newPosition - 1] != 0) {
throw new BsonSerializationException("Found a BSON string that is not null-terminated");
}
return new String(array, arrayOffset + position, bsonStringSize - 1, StandardCharsets.UTF_8);
} else if (scratchBuffer == null || bsonStringSize > scratchBuffer.length) {
int scratchBufferSize = bsonStringSize + (bsonStringSize >>> 1); //1.5 times the size
scratchBuffer = new byte[scratchBufferSize];
}
return new String(bytes, StandardCharsets.UTF_8);

buffer.get(scratchBuffer, 0, bsonStringSize);
if (scratchBuffer[bsonStringSize - 1] != 0) {
throw new BsonSerializationException("BSON string not null-terminated");
}
return new String(scratchBuffer, 0, bsonStringSize - 1, StandardCharsets.UTF_8);
}
}

@Override
public void skipCString() {
ensureOpen();
boolean checkNext = true;
while (checkNext) {
if (!buffer.hasRemaining()) {
throw new BsonSerializationException("Found a BSON string that is not null-terminated");
int pos = buffer.position();
int length = computeCStringLength(pos);
buffer.position(pos + length);
}

/*
This method uses the SWAR (SIMD Within A Register) technique when aligned access is supported.
SWAR finds a null terminator by processing 8 bytes at once.
*/
public int computeCStringLength(final int prevPos) {
ensureOpen();
int pos = buffer.position();
int limit = buffer.limit();

if (UNALIGNED_ACCESS_SUPPORTED) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So if we did this for all platforms, it would be slower on the ones that don't allow unaligned access? Slower than just going byte by byte? Just wondering if it's worth it to have two code paths to maintain.

I also don't see a test for when this value is false, since we don't run on any platforms that would make it so. It's a bit concerning that we don't, even though by inspection it seems obvious, at least with the code as it is, that it's correct. If we did want to add a test, we would have to add a testing backdoor to PlatformUtil to override the default behavior of examining "os.arch"

Copy link
Member Author

@vbabanin vbabanin Apr 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There might be some performance penalty, as ByteBuffer uses Unsafe.getLongUnaligned, which reads bytes individually and composes a long on architectures that do not support unaligned access, potentially adding some overhead.

Nearly all modern cloud providers provide architectures that support unaligned access. The ones that don’t are typically limited to embedded systems or legacy hardware. Given how rare such platforms are, I’m actually in favor of removing the platform check altogether - I think the likelihood of hitting such an architecture is extremely low. @jyemin @NathanQingyangXu What do you think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am ok with this. Keeping expanding the CPU list in the long future doesn't make much sense to me.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, let's remove the platform check.

int chunks = (limit - pos) >>> 3;
// Process 8 bytes at a time.
for (int i = 0; i < chunks; i++) {
long word = buffer.getLong(pos);
long mask = word - 0x0101010101010101L;
mask &= ~word;
mask &= 0x8080808080808080L;
if (mask != 0) {
// first null terminator found in the Little Endian long
int offset = Long.numberOfTrailingZeros(mask) >>> 3;
// Found the null at pos + offset; reset buffer's position.
return (pos - prevPos) + offset + 1;
}
pos += 8;
}
checkNext = buffer.get() != 0;
}

// Process remaining bytes one-by-one.
while (pos < limit) {
if (buffer.get(pos++) == 0) {
return (pos - prevPos);
}
}

buffer.position(pos);
throw new BsonSerializationException("Found a BSON string that is not null-terminated");
}

@Override
Expand Down
71 changes: 71 additions & 0 deletions bson/src/test/unit/org/bson/internal/PlatformUtilTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Copyright 2008-present MongoDB, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.bson.internal;

import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

class PlatformUtilTest {

@ParameterizedTest
@ValueSource(strings = {"arm", "ppc", "ppc64", "sparc", "mips"})
@DisplayName("Should not allow unaligned access for unsupported architectures")
void shouldNotAllowUnalignedAccessForUnsupportedArchitecture(final String architecture) {
withSystemProperty("os.arch", architecture, () -> {
boolean result = PlatformUtil.isUnalignedAccessAllowed();
assertFalse(result);
});
}

@Test
@DisplayName("Should not allow unaligned access when system property is undefined")
void shouldNotAllowUnalignedAccessWhenSystemPropertyIsUndefined() {
withSystemProperty("os.arch", null, () -> {
boolean result = PlatformUtil.isUnalignedAccessAllowed();
assertFalse(result);
});
}

@ParameterizedTest
@ValueSource(strings = {"x86", "amd64", "i386", "x86_64", "arm64", "aarch64"})
@DisplayName("Should allow unaligned access for supported architectures")
void shouldAllowUnalignedAccess(final String architecture) {
withSystemProperty("os.arch", architecture, () -> {
boolean result = PlatformUtil.isUnalignedAccessAllowed();
assertTrue(result);
});
}

public static void withSystemProperty(final String name, final String value, final Runnable testCode) {
String original = System.getProperty(name);
if (value == null) {
System.clearProperty(name);
} else {
System.setProperty(name, value);
}
try {
testCode.run();
} finally {
System.setProperty(name, original);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ private ByteBuf getCurrentByteBuffer() {
if (currentByteBuffer == null) {
currentByteBuffer = getByteBufferAtIndex(curBufferIndex);
}

if (currentByteBuffer.hasRemaining()) {
return currentByteBuffer;
}
Expand All @@ -185,11 +186,6 @@ private ByteBuf getCurrentByteBuffer() {
return currentByteBuffer;
}

private ByteBuf getNextByteBuffer() {
assertFalse(bufferList.get(curBufferIndex).hasRemaining());
return getByteBufferAtIndex(++curBufferIndex);
}

private ByteBuf getByteBufferAtIndex(final int index) {
if (bufferList.size() < index + 1) {
ByteBuf buffer = bufferProvider.getBuffer(index >= (MAX_SHIFT - INITIAL_SHIFT)
Expand Down Expand Up @@ -404,7 +400,7 @@ protected int writeCharacters(final String str, final boolean checkNullTerminati
int curBufferLimit = curBuffer.limit();
int remaining = curBufferLimit - curBufferPos;

if (curBuffer.hasArray()) {
if (curBuffer.isBackedByArray()) {
byte[] dst = curBuffer.array();
int arrayOffset = curBuffer.arrayOffset();
if (remaining >= str.length() + 1) {
Expand Down Expand Up @@ -459,7 +455,7 @@ private int writeOnBuffers(final String str,

if (c < 0x80) {
if (remaining == 0) {
curBuffer = getNextByteBuffer();
curBuffer = getCurrentByteBuffer();
curBufferPos = 0;
curBufferLimit = curBuffer.limit();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ public byte[] array() {
}

@Override
public boolean hasArray() {
public boolean isBackedByArray() {
return false;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,15 @@ <T extends BsonDocument> T getResponseDocument(final int messageId, final Decode
}

/**
* Returns a read-only buffer containing the response body. Care should be taken to not use the returned buffer after this instance has
* Returns a buffer containing the response body. Care should be taken to not use the returned buffer after this instance has
* been closed.
*
* @return a read-only buffer containing the response body
* NOTE: do not modify this buffer, it is being made writable for performance reasons to avoid redundant copying.
*
* @return a buffer containing the response body
*/
public ByteBuf getBodyByteBuffer() {
return bodyByteBuffer.asReadOnly();
return bodyByteBuffer;
}

public void reset() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ public byte[] array() {
}

@Override
public boolean hasArray() {
public boolean isBackedByArray() {
return proxied.hasArray();
}

Expand Down
Loading