-
Notifications
You must be signed in to change notification settings - Fork 3.7k
/
Copy pathWeaviateVectorStoreCollectionCreateMapping.cs
196 lines (174 loc) · 8.04 KB
/
WeaviateVectorStoreCollectionCreateMapping.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
// Copyright (c) Microsoft. All rights reserved.
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using Microsoft.Extensions.VectorData;
namespace Microsoft.SemanticKernel.Connectors.Weaviate;
/// <summary>
/// Class to construct Weaviate collection schema with configuration for data and vector properties.
/// More information here: <see href="https://weaviate.io/developers/weaviate/config-refs/schema"/>.
/// </summary>
internal static class WeaviateVectorStoreCollectionCreateMapping
{
/// <summary>
/// Maps record type properties to Weaviate collection schema for collection creation.
/// </summary>
/// <param name="collectionName">The name of the vector store collection.</param>
/// <param name="dataProperties">Collection of record data properties.</param>
/// <param name="vectorProperties">Collection of record vector properties.</param>
/// <param name="storagePropertyNames">A dictionary that maps from a property name to the storage name that should be used when serializing it to JSON for data and vector properties.</param>
/// <returns>Weaviate collection schema.</returns>
public static WeaviateCollectionSchema MapToSchema(
string collectionName,
IEnumerable<VectorStoreRecordDataProperty> dataProperties,
IEnumerable<VectorStoreRecordVectorProperty> vectorProperties,
IReadOnlyDictionary<string, string> storagePropertyNames)
{
var schema = new WeaviateCollectionSchema(collectionName);
// Handle data properties.
foreach (var property in dataProperties)
{
schema.Properties.Add(new WeaviateCollectionSchemaProperty
{
Name = storagePropertyNames[property.DataModelPropertyName],
DataType = [MapType(property.PropertyType)],
IndexFilterable = property.IsIndexed,
IndexSearchable = property.IsFullTextSearchable
});
}
// Handle vector properties.
foreach (var property in vectorProperties)
{
var vectorPropertyName = storagePropertyNames[property.DataModelPropertyName];
schema.VectorConfigurations.Add(vectorPropertyName, new WeaviateCollectionSchemaVectorConfig
{
VectorIndexType = MapIndexKind(property.IndexKind, vectorPropertyName),
VectorIndexConfig = new WeaviateCollectionSchemaVectorIndexConfig
{
Distance = MapDistanceFunction(property.DistanceFunction, vectorPropertyName)
}
});
}
return schema;
}
#region private
/// <summary>
/// Maps record vector property index kind to Weaviate index kind.
/// More information here: <see href="https://weaviate.io/developers/weaviate/config-refs/schema/vector-index"/>.
/// </summary>
private static string MapIndexKind(string? indexKind, string vectorPropertyName)
{
const string Hnsw = "hnsw";
const string Flat = "flat";
const string Dynamic = "dynamic";
// If index kind is not provided, use default one.
if (string.IsNullOrWhiteSpace(indexKind))
{
return Hnsw;
}
return indexKind switch
{
IndexKind.Hnsw => Hnsw,
IndexKind.Flat => Flat,
IndexKind.Dynamic => Dynamic,
_ => throw new InvalidOperationException(
$"Index kind '{indexKind}' on {nameof(VectorStoreRecordVectorProperty)} '{vectorPropertyName}' is not supported by the Weaviate VectorStore. " +
$"Supported index kinds: {string.Join(", ",
IndexKind.Hnsw,
IndexKind.Flat,
IndexKind.Dynamic)}")
};
}
/// <summary>
/// Maps record vector property distance function to Weaviate distance function.
/// More information here: <see href="https://weaviate.io/developers/weaviate/config-refs/distances"/>.
/// </summary>
private static string MapDistanceFunction(string? distanceFunction, string vectorPropertyName)
{
const string Cosine = "cosine";
const string Dot = "dot";
const string EuclideanSquared = "l2-squared";
const string Hamming = "hamming";
const string Manhattan = "manhattan";
// If distance function is not provided, use default one.
if (string.IsNullOrWhiteSpace(distanceFunction))
{
return Cosine;
}
return distanceFunction switch
{
DistanceFunction.CosineDistance => Cosine,
DistanceFunction.NegativeDotProductSimilarity => Dot,
DistanceFunction.EuclideanSquaredDistance => EuclideanSquared,
DistanceFunction.Hamming => Hamming,
DistanceFunction.ManhattanDistance => Manhattan,
_ => throw new InvalidOperationException(
$"Distance function '{distanceFunction}' on {nameof(VectorStoreRecordVectorProperty)} '{vectorPropertyName}' is not supported by the Weaviate VectorStore. " +
$"Supported distance functions: {string.Join(", ",
DistanceFunction.CosineDistance,
DistanceFunction.NegativeDotProductSimilarity,
DistanceFunction.EuclideanSquaredDistance,
DistanceFunction.Hamming,
DistanceFunction.ManhattanDistance)}")
};
}
/// <summary>
/// Maps record property type to Weaviate data type taking into account if the type is a collection or single value.
/// </summary>
private static string MapType(Type type)
{
// Check if the type is a collection.
if (typeof(IEnumerable).IsAssignableFrom(type) && type != typeof(string))
{
var elementType = GetCollectionElementType(type);
// If type is a collection, handle collection element type.
return MapType(elementType, isCollection: true);
}
// If type is not a collection, handle single type.
return MapType(type, isCollection: false);
}
/// <summary>
/// Maps record property type to Weaviate data type.
/// More information here: <see href="https://weaviate.io/developers/weaviate/config-refs/datatypes"/>.
/// </summary>
private static string MapType(Type type, bool isCollection)
{
return type switch
{
Type t when t == typeof(string) => isCollection ? "text[]" : "text",
Type t when t == typeof(int) || t == typeof(long) || t == typeof(short) || t == typeof(byte) ||
t == typeof(int?) || t == typeof(long?) || t == typeof(short?) || t == typeof(byte?) => isCollection ? "int[]" : "int",
Type t when t == typeof(float) || t == typeof(double) || t == typeof(decimal) ||
t == typeof(float?) || t == typeof(double?) || t == typeof(decimal?) => isCollection ? "number[]" : "number",
Type t when t == typeof(DateTime) || t == typeof(DateTime?) ||
t == typeof(DateTimeOffset) || t == typeof(DateTimeOffset?) => isCollection ? "date[]" : "date",
Type t when t == typeof(Guid) || t == typeof(Guid?) => isCollection ? "uuid[]" : "uuid",
Type t when t == typeof(bool) || t == typeof(bool?) => isCollection ? "boolean[]" : "boolean",
_ => isCollection ? "object[]" : "object",
};
}
/// <summary>
/// Gets the element type of a collection.
/// </summary>
/// <remarks>
/// For example, when <paramref name="type"/> is <see cref="List{T}"/>, returned type will be generic parameter <see langword="T"/>.
/// </remarks>
private static Type GetCollectionElementType(Type type)
{
if (type.IsArray)
{
var elementType = type.GetElementType();
if (elementType is not null)
{
return elementType;
}
}
if (type.IsGenericType)
{
return type.GetGenericArguments().First();
}
return typeof(object);
}
#endregion
}