-
Notifications
You must be signed in to change notification settings - Fork 3.7k
/
Copy pathVectorStore_EmbeddingGeneration.cs
120 lines (104 loc) · 5.58 KB
/
VectorStore_EmbeddingGeneration.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
// Copyright (c) Microsoft. All rights reserved.
using Azure.Identity;
using Memory.VectorStoreEmbeddingGeneration;
using Microsoft.Extensions.VectorData;
using Microsoft.SemanticKernel.Connectors.AzureOpenAI;
using Microsoft.SemanticKernel.Connectors.InMemory;
namespace Memory;
/// <summary>
/// This sample shows how to abstract embedding generation away from usage by
/// using the decorator pattern.
///
/// In the sample we create an <see cref="InMemoryVectorStore"/> and then using
/// an extension method <see cref="TextEmbeddingVectorStoreExtensions.UseTextEmbeddingGeneration(IVectorStore, Microsoft.SemanticKernel.Embeddings.ITextEmbeddingGenerationService)"/>
/// we wrap the <see cref="InMemoryVectorStore"/> with a <see cref="TextEmbeddingVectorStore"/> that will automatically generate embeddings for properties
/// that have the <see cref="GenerateTextEmbeddingAttribute"/> attribute.
///
/// The decorated vector store also adds the additional <see cref="IVectorizableTextSearch{TRecord}"/> interface to the collection
/// which allows us to search the collection using a text string without having to manually generate the embeddings.
///
/// Note that the <see cref="TextEmbeddingVectorStore"/> demonstrated here are part of this sample and not part of the Semantic Kernel libraries.
/// To use it, you will need to copy it to your own project.
/// </summary>
public class VectorStore_EmbeddingGeneration(ITestOutputHelper output) : BaseTest(output)
{
[Fact]
public async Task UseEmbeddingGenerationViaDecoratorAsync()
{
// Create an embedding generation service.
var textEmbeddingGenerationService = new AzureOpenAITextEmbeddingGenerationService(
TestConfiguration.AzureOpenAIEmbeddings.DeploymentName,
TestConfiguration.AzureOpenAIEmbeddings.Endpoint,
new AzureCliCredential());
// Construct an InMemory vector store with embedding generation.
// The UseTextEmbeddingGeneration method adds an embedding generation
// decorator class to the vector store that will automatically generate
// embeddings for properties that are decorated with the GenerateTextEmbeddingAttribute.
var vectorStore = new InMemoryVectorStore().UseTextEmbeddingGeneration(textEmbeddingGenerationService);
// Get and create collection if it doesn't exist.
var collection = vectorStore.GetCollection<ulong, Glossary>("skglossary");
await collection.CreateCollectionIfNotExistsAsync();
// Create and upsert glossary entries into the collection.
await collection.UpsertBatchAsync(CreateGlossaryEntries()).ToListAsync();
// Search the collection using a vectorizable text search.
var search = collection as IVectorizableTextSearch<Glossary>;
var searchString = "What is an Application Programming Interface";
var searchResult = await search!.VectorizableTextSearchAsync(searchString, new() { Top = 1 });
var resultRecords = await searchResult.Results.ToListAsync();
Console.WriteLine("Search string: " + searchString);
Console.WriteLine("Result: " + resultRecords.First().Record.Definition);
Console.WriteLine();
}
/// <summary>
/// Sample model class that represents a glossary entry.
/// </summary>
/// <remarks>
/// Note that each property is decorated with an attribute that specifies how the property should be treated by the vector store.
/// This allows us to create a collection in the vector store and upsert and retrieve instances of this class without any further configuration.
///
/// The <see cref="Glossary.DefinitionEmbedding"/> property is also decorated with the <see cref="GenerateTextEmbeddingAttribute"/> attribute which
/// allows the vector store to automatically generate an embedding for the property when the record is upserted.
/// </remarks>
private sealed class Glossary
{
[VectorStoreRecordKey]
public ulong Key { get; set; }
[VectorStoreRecordData(IsIndexed = true)]
public string Category { get; set; }
[VectorStoreRecordData]
public string Term { get; set; }
[VectorStoreRecordData]
public string Definition { get; set; }
[GenerateTextEmbedding(nameof(Definition))]
[VectorStoreRecordVector(1536)]
public ReadOnlyMemory<float> DefinitionEmbedding { get; set; }
}
/// <summary>
/// Create some sample glossary entries.
/// </summary>
/// <returns>A list of sample glossary entries.</returns>
private static IEnumerable<Glossary> CreateGlossaryEntries()
{
yield return new Glossary
{
Key = 1,
Category = "External Definitions",
Term = "API",
Definition = "Application Programming Interface. A set of rules and specifications that allow software components to communicate and exchange data."
};
yield return new Glossary
{
Key = 2,
Category = "Core Definitions",
Term = "Connectors",
Definition = "Connectors allow you to integrate with various services provide AI capabilities, including LLM, AudioToText, TextToAudio, Embedding generation, etc."
};
yield return new Glossary
{
Key = 3,
Category = "External Definitions",
Term = "RAG",
Definition = "Retrieval Augmented Generation - a term that refers to the process of retrieving additional data to provide as context to an LLM to use when generating a response (completion) to a user’s question (prompt)."
};
}
}