Skip to content

Commit 3f8c005

Browse files
authored
Merge pull request #85 from r-world-devs/maciekbanas/69/add-vectordatabase-method-purge_records
Add VectorDatabase method to purge records
2 parents ef68b52 + ec757eb commit 3f8c005

File tree

5 files changed

+183
-63
lines changed

5 files changed

+183
-63
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: GitAI
22
Title: Extracts Knowledge From Git Repositories
3-
Version: 0.0.0.9013
3+
Version: 0.0.0.9015
44
Authors@R: c(
55
person("Kamil", "Wais", , "[email protected]", role = c("aut", "cre")),
66
person("Krystian", "Igras", , "[email protected]", role = "aut"),

R/Pinecone.R

Lines changed: 116 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,23 @@ Pinecone <- R6::R6Class(
88
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")
99

1010
url <- paste0("https://api.pinecone.io/indexes/", private$.index)
11-
12-
httr2::request(url) |>
13-
httr2::req_headers("Api-Key" = pinecone_api_key) |>
14-
httr2::req_perform() |>
11+
12+
httr2::request(url) |>
13+
httr2::req_headers("Api-Key" = pinecone_api_key) |>
14+
httr2::req_perform() |>
1515
httr2::resp_body_json()
16-
},
17-
16+
},
17+
1818
write_record = function(id, text, metadata = list()) {
19-
20-
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")
21-
22-
url <- paste0("https://", private$.index_host)
23-
19+
20+
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")
21+
22+
url <- paste0("https://", private$.index_host)
23+
2424
embeddings <- private$.get_embeddings(text = text)
25-
25+
2626
metadata$text <- text
27-
27+
2828
body <- list(
2929
namespace = private$.namespace,
3030
vectors = list(
@@ -33,28 +33,28 @@ Pinecone <- R6::R6Class(
3333
metadata = metadata
3434
)
3535
)
36-
37-
request <- httr2::request(url) |>
38-
httr2::req_url_path_append("vectors/upsert") |>
36+
37+
request <- httr2::request(url) |>
38+
httr2::req_url_path_append("vectors/upsert") |>
3939
httr2::req_headers(
4040
"Api-Key" = pinecone_api_key,
4141
"X-Pinecone-API-Version" = "2024-10"
42-
) |>
43-
httr2::req_body_json(body)
44-
45-
response <- request |>
42+
) |>
43+
httr2::req_body_json(body)
44+
45+
response <- request |>
4646
httr2::req_perform()
47-
47+
4848
response_body <- httr2::resp_body_json(response)
4949
response_body
5050
},
5151

5252
read_record = function(id) {
5353

5454
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")
55-
55+
5656
url <- paste0("https://", private$.index_host)
57-
57+
5858
request <- httr2::request(url) |>
5959
httr2::req_url_path_append("vectors") |>
6060
httr2::req_url_path_append("fetch") |>
@@ -65,58 +65,115 @@ Pinecone <- R6::R6Class(
6565
httr2::req_headers(
6666
"Api-Key" = pinecone_api_key,
6767
"X-Pinecone-API-Version" = "2024-10"
68-
)
69-
70-
response <- request |>
68+
)
69+
70+
response <- request |>
7171
httr2::req_perform()
72-
72+
7373
response_body <- httr2::resp_body_json(response)
7474
results <- response_body$vectors
75-
76-
results
75+
76+
results
7777
},
7878

79-
79+
8080
find_records = function(query, top_k = 1) {
81-
81+
8282
embeddings <- private$.get_embeddings(query)
83-
83+
8484
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")
85-
85+
8686
url <- paste0("https://", private$.index_host)
87-
87+
8888
body <- list(
8989
namespace = private$.namespace,
9090
vector = embeddings,
9191
topK = top_k,
9292
includeValues = FALSE,
9393
includeMetadata = TRUE
9494
)
95-
95+
9696
request <- httr2::request(url) |>
9797
httr2::req_url_path_append("query") |>
9898
httr2::req_headers(
9999
"Api-Key" = pinecone_api_key,
100100
"X-Pinecone-API-Version" = "2024-10"
101101
) |>
102102
httr2::req_body_json(body)
103-
104-
response <- request |>
103+
104+
response <- request |>
105105
httr2::req_perform()
106-
106+
107107
response_body <- httr2::resp_body_json(response)
108108
results <- response_body$matches
109-
110-
results |>
109+
110+
results |>
111111
purrr::map(function(result) {
112112
result$values <- NULL
113113
result
114114
})
115+
},
116+
117+
list_record_IDs = function() {
118+
119+
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")
120+
121+
url <- paste0("https://", private$.index_host)
122+
123+
response_body <- NULL
124+
has_next_page <- TRUE
125+
record_ids <- c()
126+
127+
while (has_next_page) {
128+
129+
request <- httr2::request(url) |>
130+
httr2::req_url_path_append("vectors") |>
131+
httr2::req_url_path_append("list") |>
132+
httr2::req_url_query(
133+
namespace = private$.namespace,
134+
paginationToken = response_body$pagination$`next`
135+
) |>
136+
httr2::req_headers(
137+
"Api-Key" = pinecone_api_key,
138+
"X-Pinecone-API-Version" = "2024-10"
139+
)
140+
141+
response <- request |>
142+
httr2::req_perform()
143+
144+
response_body <- httr2::resp_body_json(response)
145+
record_ids <- c(record_ids,
146+
purrr::map_vec(response_body$vectors, ~ .$id))
147+
has_next_page <- "pagination" %in% names(response_body)
148+
}
149+
150+
return(record_ids)
151+
},
152+
153+
purge_records = function(ids) {
154+
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")
155+
156+
url <- paste0("https://", private$.index_host)
157+
158+
body <- list(
159+
ids = ids,
160+
namespace = private$.namespace
161+
)
162+
163+
httr2::request(url) |>
164+
httr2::req_url_path_append("vectors") |>
165+
httr2::req_url_path_append("delete") |>
166+
httr2::req_headers(
167+
"Api-Key" = pinecone_api_key,
168+
"X-Pinecone-API-Version" = "2024-10"
169+
) |>
170+
httr2::req_body_json(body) |>
171+
httr2::req_perform()
115172
}
116173
),
117174

118175
active = list(
119-
176+
120177
namespace = function(value) {
121178
if (missing(value)) return(private$.namespace)
122179
private$.namespace <- value
@@ -127,14 +184,14 @@ Pinecone <- R6::R6Class(
127184
private$.index <- value
128185
}
129186
),
130-
187+
131188
private = list(
132-
189+
133190
.project_id = NULL,
134191
.index = NULL,
135192
.namespace = NULL,
136193
.index_host = NULL,
137-
194+
138195
.initialize = function(index, namespace) {
139196

140197
private$.index <- index
@@ -143,37 +200,37 @@ Pinecone <- R6::R6Class(
143200
},
144201

145202
.get_embeddings = function(text) {
146-
147-
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")
148-
203+
204+
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")
205+
149206
url <- "https://api.pinecone.io"
150-
207+
151208
body <- list(
152209
model = "multilingual-e5-large",
153210
parameters = list(
154211
input_type = "passage",
155212
truncate = "END"
156-
),
213+
),
157214
inputs = list(
158215
list(text = text)
159-
)
216+
)
160217
)
161218

162-
request <- httr2::request(url) |>
163-
httr2::req_url_path_append("embed") |>
219+
request <- httr2::request(url) |>
220+
httr2::req_url_path_append("embed") |>
164221
httr2::req_headers(
165222
"Api-Key" = pinecone_api_key,
166223
"X-Pinecone-API-Version" = "2024-10"
167-
) |>
168-
httr2::req_body_json(body)
169-
170-
response <- request |>
224+
) |>
225+
httr2::req_body_json(body)
226+
227+
response <- request |>
171228
httr2::req_perform()
172-
229+
173230
response_body <- httr2::resp_body_json(response)
174-
231+
175232
response_body$data[[1]]$values |> unlist()
176-
233+
177234
}
178235
)
179236
)

R/test-helpers.R

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,31 @@ PineconeMocked <- R6::R6Class(
134134
result$values <- NULL
135135
result
136136
})
137+
},
138+
139+
list_record_IDs = function() {
140+
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")
141+
142+
url <- paste0("https://", private$.index_host)
143+
144+
request <- httr2::request(url) |>
145+
httr2::req_url_path_append("vectors") |>
146+
httr2::req_url_path_append("list") |>
147+
httr2::req_url_query(
148+
namespace = private$.namespace
149+
) |>
150+
httr2::req_headers(
151+
"Api-Key" = pinecone_api_key,
152+
"X-Pinecone-API-Version" = "2024-10"
153+
)
154+
155+
response <- httr2::response_json(
156+
body = test_fixtures[["list_record_IDs"]]
157+
)
158+
159+
response_body <- httr2::resp_body_json(response)
160+
161+
purrr::map_vec(response_body$vectors, ~ .$id)
137162
}
138163
),
139164

@@ -235,3 +260,25 @@ test_fixtures[["read_record"]] <- list(
235260
"namespace" = "gitai-tests",
236261
"usage" = list("readUnits" = 1L)
237262
)
263+
264+
test_fixtures[["list_record_IDs"]] <- list(
265+
"vectors" = list(
266+
list(
267+
"id" = "project_1"
268+
),
269+
list(
270+
"id" = "project_2"
271+
),
272+
list(
273+
"id" = "project_3"
274+
),
275+
list(
276+
"id" = "project_4"
277+
),
278+
list(
279+
"id" = "project_5"
280+
)
281+
),
282+
"namespace" = "gitai-tests",
283+
"usage" = list("readUnits" = 1L)
284+
)

inst/example_workflow.R

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
gitai_demo <- initialize_project("gitai-tests") |>
2-
set_database(index = "gitai-mb",
3-
namespace = "gitai-demo-2") |>
1+
gitai_demo <- initialize_project("gitai-demo-2") |>
2+
set_database(index = "gitai-mb") |>
43
set_github_repos(
54
orgs = "r-world-devs"
65
) |>
@@ -13,3 +12,7 @@ process_repos(gitai_demo)
1312
gitai_demo$db$find_records("Find package with which I can plot data.")
1413

1514
gitai_demo$db$read_record("GitStats")
15+
16+
record_ids <- gitai_demo$db$list_record_IDs()
17+
18+
gitai_demo$db$purge_records(record_ids)

tests/testthat/test-Pinecone.R

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,16 @@ test_that("reading records", {
8181
is.character() |>
8282
expect_true()
8383
})
84+
85+
test_that("listing all records IDs", {
86+
87+
db <- PineconeMocked$new(
88+
namespace = "test_project_id",
89+
index = "gitai"
90+
)
91+
92+
result <- db$list_record_IDs()
93+
94+
expect_type(result, "character")
95+
expect_gt(length(result), 1)
96+
})

0 commit comments

Comments
 (0)