-
Notifications
You must be signed in to change notification settings - Fork 69
/
Copy pathEmbeddedContent.ts
130 lines (112 loc) · 2.82 KB
/
EmbeddedContent.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import { Page, PersistedPage } from ".";
import { VectorStore } from "../VectorStore";
/**
The embedded content of a chunk of text stored in the database.
*/
export interface EmbeddedContent {
/**
The URL of the page with the content.
*/
url: string;
/**
The name of the data source the page was loaded from.
*/
sourceName: string;
/**
The text represented by the vector embedding.
*/
text: string;
/**
The number of embedding tokens in the content.
*/
tokenCount: number;
/**
The vector embeddings of the text.
*/
embeddings: {
[k: string]: number[];
};
/**
The date the content was last updated.
*/
updated: Date;
/**
Arbitrary metadata associated with the content. If the content text has
metadata in Front Matter format, this metadata should match that metadata.
*/
metadata?: {
pageTitle?: string;
tags?: string[];
[k: string]: unknown;
};
/**
The order of the chunk if this content was chunked from a larger page.
*/
chunkIndex?: number;
/**
Non-cryptographic hash of the actual chunking function (and its options)
used to produce this chunk. Used to detect whether the chunk should be
updated because the function or options have changed.
*/
chunkAlgoHash?: string;
}
export type DeleteEmbeddedContentArgs = {
/**
The page for which to delete embedded content.
*/
page?: Page;
/**
The names of the data sources for which to delete embedded content.
*/
dataSources?: string[];
/**
If true, delete pages that do NOT match the data sources in the query.
*/
inverseDataSources?: boolean;
};
export interface GetSourcesMatchParams {
sourceNames?: string[];
chunkAlgoHash: {
hashValue: string;
operation: "equals" | "notEquals";
};
}
/**
Data store of the embedded content.
*/
export type EmbeddedContentStore = VectorStore<EmbeddedContent> & {
/**
Load the embedded content for the given page.
*/
loadEmbeddedContent(args: { page: Page }): Promise<EmbeddedContent[]>;
/**
Delete all embedded content for the given page and/or data sources.
*/
deleteEmbeddedContent(args: DeleteEmbeddedContentArgs): Promise<void>;
/**
Replace all embedded content for the given page with the given embedded content.
*/
updateEmbeddedContent(args: {
page: Page;
embeddedContent: EmbeddedContent[];
}): Promise<void>;
/**
Close connection to data store.
*/
close?: () => Promise<void>;
/**
Additional implementation-specific metadata about the store.
*/
metadata: {
embeddingName: string;
[k: string]: unknown;
};
/**
Initialize the store.
*/
init?: () => Promise<void>;
/**
Get the data sources that match the given query.
*/
getDataSources(matchQuery: GetSourcesMatchParams): Promise<string[]>;
};