Skip to content

CSHARP-5603: Add Big Endian support in BinaryVectorReader and BinaryVectorWriter #1682

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jun 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions src/MongoDB.Bson/IO/BinaryPrimitivesCompat.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

using System;
using System.Buffers.Binary;
using System.Runtime.InteropServices;

namespace MongoDB.Bson.IO
{
Expand All @@ -31,5 +32,55 @@ public static void WriteDoubleLittleEndian(Span<byte> destination, double value)
{
BinaryPrimitives.WriteInt64LittleEndian(destination, BitConverter.DoubleToInt64Bits(value));
}

public static float ReadSingleLittleEndian(ReadOnlySpan<byte> source)
{
if (source.Length < 4)
{
throw new ArgumentOutOfRangeException(nameof(source.Length), "Source span is too small to contain a float.");
}

#if NET6_0_OR_GREATER
return BinaryPrimitives.ReadSingleLittleEndian(source);
#else
// Constructs a 32-bit float from 4 Little Endian bytes in a platform-agnostic way.
// Ensures correct bit pattern regardless of system endianness.
int intValue =
source[0] |
(source[1] << 8) |
(source[2] << 16) |
(source[3] << 24);

// This struct emulates BitConverter.Int32BitsToSingle for platforms like net472.
return new FloatIntUnion { IntValue = intValue }.FloatValue;
#endif
}

public static void WriteSingleLittleEndian(Span<byte> destination, float value)
{
if (destination.Length < 4)
{
throw new ArgumentOutOfRangeException(nameof(destination.Length), "Destination span is too small to hold a float.");
}

#if NET6_0_OR_GREATER
BinaryPrimitives.WriteSingleLittleEndian(destination, value);
#else
// This struct emulates BitConverter.SingleToInt32Bits for platforms like net472.
int intValue = new FloatIntUnion { FloatValue = value }.IntValue;

destination[0] = (byte)(intValue);
destination[1] = (byte)(intValue >> 8);
destination[2] = (byte)(intValue >> 16);
destination[3] = (byte)(intValue >> 24);
#endif
}

[StructLayout(LayoutKind.Explicit)]
private struct FloatIntUnion
{
[FieldOffset(0)] public float FloatValue;
[FieldOffset(0)] public int IntValue;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice solution!

}
}
42 changes: 27 additions & 15 deletions src/MongoDB.Bson/Serialization/BinaryVectorReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using MongoDB.Bson.IO;

namespace MongoDB.Bson.Serialization
{
Expand All @@ -41,21 +42,8 @@ public static (TItem[] Items, byte Padding, BinaryVectorDataType VectorDataType)
switch (vectorDataType)
{
case BinaryVectorDataType.Float32:

if ((vectorDataBytes.Span.Length & 3) != 0)
{
throw new FormatException("Data length of binary vector of type Float32 must be a multiple of 4 bytes.");
}

if (BitConverter.IsLittleEndian)
{
var singles = MemoryMarshal.Cast<byte, float>(vectorDataBytes.Span);
items = (TItem[])(object)singles.ToArray();
}
else
{
throw new NotSupportedException("Binary vector data is not supported on Big Endian architecture yet.");
}
var floatArray = ReadSinglesArrayLittleEndian(vectorDataBytes.Span);
items = (TItem[])(object)floatArray;
break;
case BinaryVectorDataType.Int8:
var itemsSpan = MemoryMarshal.Cast<byte, TItem>(vectorDataBytes.Span);
Expand Down Expand Up @@ -123,6 +111,30 @@ TExpectedItem[] AsTypedArrayOrThrow<TExpectedItem>()
return result;
}
}

private static float[] ReadSinglesArrayLittleEndian(ReadOnlySpan<byte> span)
{
if ((span.Length & 3) != 0)
{
throw new FormatException("Data length of binary vector of type Float32 must be a multiple of 4 bytes.");
}

float[] result;
if (BitConverter.IsLittleEndian)
{
result = MemoryMarshal.Cast<byte, float>(span).ToArray();
}
else
{
var count = span.Length / 4;
result = new float[count];
for (int i = 0; i < count; i++)
{
result[i] = BinaryPrimitivesCompat.ReadSingleLittleEndian(span.Slice(i * 4, 4));
}
}
return result;
}

public static void ValidateItemType<TItem>(BinaryVectorDataType binaryVectorDataType)
{
Expand Down
37 changes: 31 additions & 6 deletions src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

using System;
using System.Runtime.InteropServices;
using MongoDB.Bson.IO;

namespace MongoDB.Bson.Serialization
{
Expand All @@ -35,15 +36,39 @@ public static byte[] WriteToBytes<TItem>(BinaryVector<TItem> binaryVector)
public static byte[] WriteToBytes<TItem>(ReadOnlySpan<TItem> vectorData, BinaryVectorDataType binaryVectorDataType, byte padding)
where TItem : struct
{
if (!BitConverter.IsLittleEndian)
switch (binaryVectorDataType)
{
throw new NotSupportedException("Binary vector data is not supported on Big Endian architecture yet.");
}
case BinaryVectorDataType.Float32:
var length = vectorData.Length * 4;
var result = new byte[2 + length];
result[0] = (byte)binaryVectorDataType;
result[1] = padding;

var floatSpan = MemoryMarshal.Cast<TItem, float>(vectorData);
var floatOutput = result.AsSpan(2);

if (BitConverter.IsLittleEndian)
{
MemoryMarshal.Cast<float, byte>(floatSpan).CopyTo(floatOutput);
}
else
{
for (int i = 0; i < floatSpan.Length; i++)
{
BinaryPrimitivesCompat.WriteSingleLittleEndian(floatOutput.Slice(i * 4, 4), floatSpan[i]);
}
}

var vectorDataBytes = MemoryMarshal.Cast<TItem, byte>(vectorData);
byte[] result = [(byte)binaryVectorDataType, padding, .. vectorDataBytes];
return result;

return result;
case BinaryVectorDataType.Int8:
case BinaryVectorDataType.PackedBit:
var vectorDataBytes = MemoryMarshal.Cast<TItem, byte>(vectorData);
return [(byte)binaryVectorDataType, padding, .. vectorDataBytes];

default:
throw new NotSupportedException($"Binary vector serialization is not supported for {binaryVectorDataType}.");
}
}
}
}
89 changes: 89 additions & 0 deletions tests/MongoDB.Bson.Tests/IO/BinaryPrimitivesCompatTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/* Copyright 2010-present MongoDB Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

using System;
using Xunit;
using FluentAssertions;
using MongoDB.Bson.IO;

namespace MongoDB.Bson.Tests.IO
{
public class BinaryPrimitivesCompatTests
{
[Fact]
public void ReadSingleLittleEndian_should_read_correctly()
{
var bytes = new byte[] { 0x00, 0x00, 0x80, 0x3F }; // 1.0f in little endian
var result = BinaryPrimitivesCompat.ReadSingleLittleEndian(bytes);
result.Should().Be(1.0f);
}

[Fact]
public void ReadSingleLittleEndian_should_throw_on_insufficient_length()
{
var shortBuffer = new byte[3];
var exception = Record.Exception(() =>
BinaryPrimitivesCompat.ReadSingleLittleEndian(shortBuffer));

var e = exception.Should().BeOfType<ArgumentOutOfRangeException>().Subject;
e.ParamName.Should().Be("Length");
}

[Fact]
public void WriteSingleLittleEndian_should_throw_on_insufficient_length()
{
var shortBuffer = new byte[3];
var exception = Record.Exception(() =>
BinaryPrimitivesCompat.WriteSingleLittleEndian(shortBuffer, 1.23f));

var e = exception.Should().BeOfType<ArgumentOutOfRangeException>().Subject;
e.ParamName.Should().Be("Length");
}

[Fact]
public void WriteSingleLittleEndian_should_write_correctly()
{
Span<byte> buffer = new byte[4];
BinaryPrimitivesCompat.WriteSingleLittleEndian(buffer, 1.0f);
buffer.ToArray().Should().Equal(0x00, 0x00, 0x80, 0x3F); // 1.0f little-endian
}

[Theory]
[InlineData(0f)]
[InlineData(1.0f)]
[InlineData(-1.5f)]
[InlineData(float.MaxValue)]
[InlineData(float.MinValue)]
[InlineData(float.NaN)]
[InlineData(float.PositiveInfinity)]
[InlineData(float.NegativeInfinity)]
public void WriteAndReadSingleLittleEndian_should_roundtrip_correctly(float value)
{
Span<byte> buffer = new byte[4];

BinaryPrimitivesCompat.WriteSingleLittleEndian(buffer, value);
float result = BinaryPrimitivesCompat.ReadSingleLittleEndian(buffer);

if (float.IsNaN(value))
{
Assert.True(float.IsNaN(result));
}
else
{
Assert.Equal(value, result);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -365,10 +365,16 @@ private BsonBinaryData SerializeToBinaryData<TCollection>(TCollection collection
private static (T[], byte[] VectorBson) GetTestData<T>(BinaryVectorDataType dataType, int elementsCount, byte bitsPadding)
where T : struct
{
var elementsSpan = new ReadOnlySpan<T>(Enumerable.Range(0, elementsCount).Select(i => Convert.ChangeType(i, typeof(T)).As<T>()).ToArray());
byte[] vectorBsonData = [(byte)dataType, bitsPadding, .. MemoryMarshal.Cast<T, byte>(elementsSpan)];

return (elementsSpan.ToArray(), vectorBsonData);
var elementsSpan = new ReadOnlySpan<T>(
Enumerable.Range(0, elementsCount)
.Select(i => Convert.ChangeType(i, typeof(T)).As<T>())
.ToArray());
var elementsBytesLittleEndian = BitConverter.IsLittleEndian
? MemoryMarshal.Cast<T, byte>(elementsSpan)
: BigEndianToLittleEndian(elementsSpan, dataType);

byte[] vectorBsonData = [(byte)dataType, bitsPadding, .. elementsBytesLittleEndian];
return (elementsSpan.ToArray(), vectorBsonData);
}

private static (BinaryVector<T>, byte[] VectorBson) GetTestDataBinaryVector<T>(BinaryVectorDataType dataType, int elementsCount, byte bitsPadding)
Expand Down Expand Up @@ -409,6 +415,27 @@ private static IBsonSerializer CreateBinaryVectorSerializer<T>(BinaryVectorDataT
return serializer;
}

private static byte[] BigEndianToLittleEndian<T>(ReadOnlySpan<T> span, BinaryVectorDataType dataType) where T : struct
{
// Types that do NOT need conversion safe on BE
if (dataType == BinaryVectorDataType.Int8 || dataType == BinaryVectorDataType.PackedBit)
{
return MemoryMarshal.Cast<T, byte>(span).ToArray();
}

var elementSize = Marshal.SizeOf<T>();
byte[] result = new byte[span.Length * elementSize];

for (int i = 0; i < span.Length; i++)
{
byte[] bytes = BitConverter.GetBytes((dynamic)span[i]);
Array.Reverse(bytes); // Ensure LE order
Buffer.BlockCopy(bytes, 0, result, i * elementSize, elementSize);
}

return result;
}

public class BinaryVectorNoAttributeHolder
{
public BinaryVectorInt8 ValuesInt8 { get; set; }
Expand Down