From 402a276ea668e199089315137a8bbdb04d48a1c8 Mon Sep 17 00:00:00 2001 From: rustagir Date: Fri, 11 Apr 2025 11:54:39 -0400 Subject: [PATCH 1/4] DOCSP-40227: memory serialization --- source/fundamentals/atlas-vector-search.txt | 6 +- source/fundamentals/serialization.txt | 65 +++++++++++++++++++ .../code-examples/MemorySerialization.cs | 38 +++++++++++ source/whats-new.txt | 3 +- 4 files changed, 109 insertions(+), 3 deletions(-) create mode 100644 source/includes/fundamentals/code-examples/MemorySerialization.cs diff --git a/source/fundamentals/atlas-vector-search.txt b/source/fundamentals/atlas-vector-search.txt index ae3204e7..d6390eda 100644 --- a/source/fundamentals/atlas-vector-search.txt +++ b/source/fundamentals/atlas-vector-search.txt @@ -38,6 +38,8 @@ To learn more about {+vector-search+}, see the :atlas:`{+vector-search+} ` guide in the MongoDB Atlas documentation. +.. _csharp-supported-vector-types: + Supported Vector Embedding Types -------------------------------- @@ -50,7 +52,6 @@ search and retrieval. The {+driver-short+} supports vector embeddings of several types. The following sections describe the supported vector embedding types. - .. _csharp-vector-array-representation: Array Representations @@ -190,4 +191,5 @@ guide, see the following API Documentation: - `BinaryVectorFloat32 <{+new-api-root+}/MongoDB.Bson/MongoDB.Bson.BinaryVectorPackedBit.html>`__ - `ToQueryVector() <{+new-api-root+}/MongoDB.Driver/MongoDB.Driver.BinaryVectorDriverExtensions.ToQueryVector.html>`__ - `VectorSearch() <{+new-api-root+}/MongoDB.Driver/MongoDB.Driver.AggregateFluentBase-1.VectorSearch.html>`__ -- `Aggregate() <{+new-api-root+}/MongoDB.Driver/MongoDB.Driver.IMongoCollectionExtensions.Aggregate.html>`__ \ No newline at end of file +- `Aggregate() + <{+new-api-root+}/MongoDB.Driver/MongoDB.Driver.IMongoCollectionExtensions.Aggregate.html>`__ diff --git a/source/fundamentals/serialization.txt b/source/fundamentals/serialization.txt index ab0af251..44bf3542 100644 --- a/source/fundamentals/serialization.txt +++ b/source/fundamentals/serialization.txt @@ -200,6 +200,71 @@ specified conventions, then passing it to the var camelCaseConvention = new ConventionPack { new CamelCaseElementNameConvention() }; ConventionRegistry.Register("CamelCaseConvention", camelCaseConvention, t => true); +.. _csharp-array-serialization: + +Improve Array Serialization Performance +--------------------------------------- + +You can improve your application's performance by representing +arrays of primitives as `Memory `__ +and `ReadOnlyMemory `__ +structs instead of by using types such as standard {+language+} arrays or +``BsonArray``. The driver implements fast serialization and +deserialization paths for ``Memory`` and ``ReadOnlyMemory``, which +enhances speed and reduces memory usage. + +.. note:: + + Truncation and overflow checks are not supported for ``Memory`` or + ``ReadOnlyMemory``, but these checks are implemented for standard + arrays. + +You can effect these performance improvements by storing the following +primitive types in ``Memory`` or ``ReadOnlyMemory`` objects: + +- ``bool`` +- ``sbyte`` +- ``byte`` +- ``char`` +- ``short`` +- ``ushort`` +- ``int`` +- ``uint`` +- ``long`` +- ``ulong`` +- ``float`` +- ``double`` +- ``decimal`` + +The following example defines a ``Line`` POCO that contains array fields +modeled by ``Memory`` and ``ReadOnlyMemory`` structs: + +.. literalinclude:: /includes/fundamentals/code-examples/MemorySerialization.cs + :start-after: start-line-class + :end-before: end-line-class + :language: csharp + :dedent: + +The following document represents how a sample ``Line`` object is +represented in MongoDB: + +.. code-block:: json + + { + "_id": ..., + "X": [ 1, 2, 3, 4, 5 ], + "Y": [ 1, 1.409999966621399, 1.7300000190734863, 2, 2.240000009536743 ] + } + +.. tip:: Model Vectors + + :ref:`csharp-atlas-vector-search` involves creating and querying + large numerical arrays. If your application uses + {+vector-search+}, you might benefit from the performance + improvements from using ``Memory`` and ``ReadOnlyMemory`` to model + vector data. To learn more, see :ref:`csharp-supported-vector-types` + in the {+vector-search+} guide. + Additional Information ---------------------- diff --git a/source/includes/fundamentals/code-examples/MemorySerialization.cs b/source/includes/fundamentals/code-examples/MemorySerialization.cs new file mode 100644 index 00000000..5ad044ea --- /dev/null +++ b/source/includes/fundamentals/code-examples/MemorySerialization.cs @@ -0,0 +1,38 @@ +using MongoDB.Bson; +using MongoDB.Bson.Serialization.Conventions; +using MongoDB.Driver; + +public class Program +{ + + public static void Main(string[] args) + { + // Replace with your connection string + const string uri = ""; + + var mongoClient = new MongoClient(uri); + var database = mongoClient.GetDatabase("db"); + var _collection = database.GetCollection("lines"); + + var line = new Line + { + X = new Memory(new[] { 1, 2, 3, 4, 5 }), + Y = new ReadOnlyMemory(new[] { 1f, 1.41f, 1.73f, 2f, 2.24f }) + }; + + var filter = Builders.Filter.Empty; + + var result = _collection.Find(filter).FirstOrDefault().ToJson(); + Console.WriteLine(result); + } + +} + +// start-line-class +public class Line +{ + public ObjectId Id { get; set; } + public Memory X { get; set; } + public ReadOnlyMemory Y { get; set; } +} +// end-line-class diff --git a/source/whats-new.txt b/source/whats-new.txt index bb5ff603..ed763541 100644 --- a/source/whats-new.txt +++ b/source/whats-new.txt @@ -392,7 +392,8 @@ The 2.26 driver release includes the following new features: - Enabled use of native ``crypto`` in ``libmongocrypt`` bindings. - Added support for serialization of ``Memory`` and ``ReadOnlyMemory`` - structs. + structs. To learn more about implementing these types, see the + :ref:`csharp-array-serialization` section of the Serialization guide. - Added support for the GCP Identity Provider when using the ``MONGODB-OIDC`` authentication mechanism. To learn more, see From 818651a18268b138fc3c3a6b64f5c83e698b3bb1 Mon Sep 17 00:00:00 2001 From: rustagir Date: Fri, 11 Apr 2025 11:57:13 -0400 Subject: [PATCH 2/4] add note --- source/fundamentals/atlas-vector-search.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/source/fundamentals/atlas-vector-search.txt b/source/fundamentals/atlas-vector-search.txt index d6390eda..f99da565 100644 --- a/source/fundamentals/atlas-vector-search.txt +++ b/source/fundamentals/atlas-vector-search.txt @@ -72,6 +72,12 @@ The following example shows a class with properties of the preceding types: :start-after: start-bson-arrays :end-before: end-bson-arrays +.. tip:: + + To learn more about using the ``Memory`` and ``ReadOnlyMemory`` + types, see the :ref:`csharp-array-serialization` section of the + Serialization guide. + .. _csharp-binary-vector-representation: Binary Vector Representations From 34a07e3c9b183f59b290d628bd322807d86381f1 Mon Sep 17 00:00:00 2001 From: rustagir Date: Fri, 11 Apr 2025 13:54:05 -0400 Subject: [PATCH 3/4] MM PR fixes 1 --- source/fundamentals/serialization.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/fundamentals/serialization.txt b/source/fundamentals/serialization.txt index 44bf3542..c3b3f006 100644 --- a/source/fundamentals/serialization.txt +++ b/source/fundamentals/serialization.txt @@ -209,7 +209,7 @@ You can improve your application's performance by representing arrays of primitives as `Memory `__ and `ReadOnlyMemory `__ structs instead of by using types such as standard {+language+} arrays or -``BsonArray``. The driver implements fast serialization and +``BsonArray`` objects. The driver implements fast serialization and deserialization paths for ``Memory`` and ``ReadOnlyMemory``, which enhances speed and reduces memory usage. From 57a41f5aed4ea1c41fbfa11144c92d9c70d87d0b Mon Sep 17 00:00:00 2001 From: rustagir Date: Mon, 14 Apr 2025 09:18:15 -0400 Subject: [PATCH 4/4] BD tech review comments 1 --- source/fundamentals/serialization.txt | 8 ++++---- .../fundamentals/code-examples/MemorySerialization.cs | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/source/fundamentals/serialization.txt b/source/fundamentals/serialization.txt index c3b3f006..1440b35d 100644 --- a/source/fundamentals/serialization.txt +++ b/source/fundamentals/serialization.txt @@ -220,7 +220,7 @@ enhances speed and reduces memory usage. arrays. You can effect these performance improvements by storing the following -primitive types in ``Memory`` or ``ReadOnlyMemory`` objects: +primitive types in ``Memory`` or ``ReadOnlyMemory`` structs: - ``bool`` - ``sbyte`` @@ -261,9 +261,9 @@ represented in MongoDB: :ref:`csharp-atlas-vector-search` involves creating and querying large numerical arrays. If your application uses {+vector-search+}, you might benefit from the performance - improvements from using ``Memory`` and ``ReadOnlyMemory`` to model - vector data. To learn more, see :ref:`csharp-supported-vector-types` - in the {+vector-search+} guide. + improvements from using ``Memory`` and ``ReadOnlyMemory`` to store + array representations of embeddings and query vectors. To learn more, + see :ref:`csharp-supported-vector-types` in the {+vector-search+} guide. Additional Information ---------------------- diff --git a/source/includes/fundamentals/code-examples/MemorySerialization.cs b/source/includes/fundamentals/code-examples/MemorySerialization.cs index 5ad044ea..d32e8d2b 100644 --- a/source/includes/fundamentals/code-examples/MemorySerialization.cs +++ b/source/includes/fundamentals/code-examples/MemorySerialization.cs @@ -4,7 +4,6 @@ public class Program { - public static void Main(string[] args) { // Replace with your connection string