-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Add Big Endian Support for Float32 in BinaryVectorWriter.WriteToBytes<T>() #1682
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
ff89368
2c2cae1
2c4a16a
0ee1694
f43d935
92b7ed2
530ecda
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
*/ | ||
|
||
using System; | ||
using System.Buffers.Binary; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Runtime.InteropServices; | ||
|
@@ -26,7 +27,6 @@ public static BinaryVector<TItem> ReadBinaryVector<TItem>(ReadOnlyMemory<byte> v | |
where TItem : struct | ||
{ | ||
var (items, padding, vectorDataType) = ReadBinaryVectorAsArray<TItem>(vectorData); | ||
|
||
return CreateBinaryVector(items, padding, vectorDataType); | ||
} | ||
|
||
|
@@ -41,29 +41,39 @@ public static (TItem[] Items, byte Padding, BinaryVectorDataType VectorDataType) | |
switch (vectorDataType) | ||
{ | ||
case BinaryVectorDataType.Float32: | ||
|
||
if ((vectorDataBytes.Span.Length & 3) != 0) | ||
{ | ||
throw new FormatException("Data length of binary vector of type Float32 must be a multiple of 4 bytes."); | ||
} | ||
|
||
if (BitConverter.IsLittleEndian) | ||
if (typeof(TItem) != typeof(float)) | ||
{ | ||
var singles = MemoryMarshal.Cast<byte, float>(vectorDataBytes.Span); | ||
items = (TItem[])(object)singles.ToArray(); | ||
throw new NotSupportedException($"Expected float for Float32 vector type, but found {typeof(TItem)}."); | ||
} | ||
else | ||
|
||
int count = vectorDataBytes.Length / 4; // 4 bytes per float | ||
float[] floatArray = new float[count]; | ||
|
||
for (int i = 0; i < count; i++) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Few thoughts here:
|
||
{ | ||
throw new NotSupportedException("Binary vector data is not supported on Big Endian architecture yet."); | ||
// Each float32 is 4 bytes. So to extract the i-th float, we slice 4 bytes from offset i * 4. Use little-endian or big-endian decoding based on platform. | ||
floatArray[i] = BitConverter.IsLittleEndian | ||
? MemoryMarshal.Read<float>(vectorDataBytes.Span.Slice(i * 4, 4)) // fast, unaligned read on little endian | ||
: BinaryPrimitives.ReadSingleBigEndian(vectorDataBytes.Span.Slice(i * 4, 4)); // correctly reassemble 4 bytes as big-endian float | ||
BorisDog marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need to read little endian here. BSON is always little endian, so we need to swap the bytes when reading on big endian. Same logic applies to Write as well. In any case, we need to validate this solution with a real server, for example comparing the binary vector values read from Atlas deployment and the actual values. |
||
} | ||
|
||
items = (TItem[])(object)floatArray; | ||
break; | ||
|
||
BorisDog marked this conversation as resolved.
Show resolved
Hide resolved
|
||
case BinaryVectorDataType.Int8: | ||
var itemsSpan = MemoryMarshal.Cast<byte, TItem>(vectorDataBytes.Span); | ||
items = (TItem[])(object)itemsSpan.ToArray(); | ||
items = itemsSpan.ToArray(); | ||
break; | ||
|
||
case BinaryVectorDataType.PackedBit: | ||
items = (TItem[])(object)vectorDataBytes.ToArray(); | ||
break; | ||
|
||
default: | ||
throw new NotSupportedException($"Binary vector data type {vectorDataType} is not supported."); | ||
} | ||
|
@@ -149,3 +159,4 @@ private static void ValidateItemTypeForBinaryVector<TItem, TItemExpectedType, TB | |
} | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
*/ | ||
|
||
using System; | ||
using System.Buffers.Binary; | ||
using System.Runtime.InteropServices; | ||
|
||
namespace MongoDB.Bson.Serialization | ||
|
@@ -35,15 +36,44 @@ public static byte[] WriteToBytes<TItem>(BinaryVector<TItem> binaryVector) | |
public static byte[] WriteToBytes<TItem>(ReadOnlySpan<TItem> vectorData, BinaryVectorDataType binaryVectorDataType, byte padding) | ||
where TItem : struct | ||
{ | ||
if (!BitConverter.IsLittleEndian) | ||
if (BitConverter.IsLittleEndian) | ||
{ | ||
throw new NotSupportedException("Binary vector data is not supported on Big Endian architecture yet."); | ||
var vectorDataBytes = MemoryMarshal.Cast<TItem, byte>(vectorData); | ||
byte[] result = [(byte)binaryVectorDataType, padding, .. vectorDataBytes]; | ||
return result; | ||
} | ||
|
||
var vectorDataBytes = MemoryMarshal.Cast<TItem, byte>(vectorData); | ||
byte[] result = [(byte)binaryVectorDataType, padding, .. vectorDataBytes]; | ||
byte[] resultBytes; | ||
switch (binaryVectorDataType) | ||
{ | ||
case BinaryVectorDataType.Float32: | ||
int length = vectorData.Length * sizeof(float); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. minor: Prefer |
||
resultBytes = new byte[2 + length]; // Allocate output buffer: | ||
resultBytes[0] = (byte)binaryVectorDataType; // - [0]: vector type | ||
resultBytes[1] = padding; // - [1]: padding | ||
var floatSpan = MemoryMarshal.Cast<TItem, float>(vectorData); | ||
Span<byte> floatOutput = resultBytes.AsSpan(2); // - [2...]: actual float data , skipping header | ||
foreach (var value in floatSpan) | ||
{ | ||
// Each float is 4 bytes - write in Big Endian format | ||
BinaryPrimitives.WriteSingleBigEndian(floatOutput, value); | ||
BorisDog marked this conversation as resolved.
Show resolved
Hide resolved
|
||
floatOutput = floatOutput.Slice(4); // advance to next 4-byte block | ||
} | ||
return resultBytes; | ||
|
||
case BinaryVectorDataType.Int8: | ||
case BinaryVectorDataType.PackedBit: | ||
var vectorDataBytes = MemoryMarshal.Cast<TItem, byte>(vectorData); | ||
resultBytes = new byte[2 + vectorDataBytes.Length]; | ||
resultBytes[0] = (byte)binaryVectorDataType; | ||
resultBytes[1] = padding; | ||
vectorDataBytes.CopyTo(resultBytes.AsSpan(2)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can the code that handles LittleEndian case be reused here? |
||
return resultBytes; | ||
|
||
return result; | ||
default: | ||
throw new NotSupportedException($"Binary vector serialization is not supported for {binaryVectorDataType} on Big Endian architecture yet."); | ||
} | ||
} | ||
} | ||
} | ||
|
Uh oh!
There was an error while loading. Please reload this page.