Skip to content

Commit ef68b52

Browse files
authored
Merge pull request #83 from r-world-devs/maciekbanas/81/prepare-mocks-for-testing-vectordatabase
Maciekbanas/81/prepare mocks for testing vectordatabase
2 parents 15d4abb + c973881 commit ef68b52

File tree

5 files changed

+244
-22
lines changed

5 files changed

+244
-22
lines changed

DESCRIPTION

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: GitAI
22
Title: Extracts Knowledge From Git Repositories
3-
Version: 0.0.0.9012
3+
Version: 0.0.0.9013
44
Authors@R: c(
55
person("Kamil", "Wais", , "[email protected]", role = c("aut", "cre")),
66
person("Krystian", "Igras", , "[email protected]", role = "aut"),
@@ -30,4 +30,3 @@ Suggests:
3030
shiny,
3131
withr
3232
Config/testthat/edition: 3
33-
Config/testthat/parallel: true

R/test-helpers.R

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,219 @@ Mocker <- R6::R6Class(
1919
}
2020
)
2121
)
22+
23+
PineconeMocked <- R6::R6Class(
24+
"PineconeMocked",
25+
inherit = Pinecone,
26+
public = list(
27+
get_index_metadata = function() {
28+
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")
29+
30+
url <- paste0("https://api.pinecone.io/indexes/", private$.index)
31+
32+
response <- httr2::response_json(
33+
body = test_fixtures[["pinecone_index_response"]]
34+
)
35+
httr2::resp_body_json(response)
36+
},
37+
38+
write_record = function(id, text, metadata = list()) {
39+
40+
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")
41+
42+
url <- paste0("https://", private$.index_host)
43+
44+
embeddings <- private$.get_embeddings(text = text)
45+
46+
metadata$text <- text
47+
48+
body <- list(
49+
namespace = private$.namespace,
50+
vectors = list(
51+
id = id,
52+
values = embeddings,
53+
metadata = metadata
54+
)
55+
)
56+
57+
request <- httr2::request(url) |>
58+
httr2::req_url_path_append("vectors/upsert") |>
59+
httr2::req_headers(
60+
"Api-Key" = pinecone_api_key,
61+
"X-Pinecone-API-Version" = "2024-10"
62+
) |>
63+
httr2::req_body_json(body)
64+
65+
response <- httr2::response_json(
66+
body = list("upsertedCount" = 1)
67+
)
68+
69+
response_body <- httr2::resp_body_json(response)
70+
response_body
71+
},
72+
73+
read_record = function(id) {
74+
75+
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")
76+
77+
url <- paste0("https://", private$.index_host)
78+
79+
request <- httr2::request(url) |>
80+
httr2::req_url_path_append("vectors") |>
81+
httr2::req_url_path_append("fetch") |>
82+
httr2::req_url_query(
83+
ids = id,
84+
namespace = private$.namespace
85+
) |>
86+
httr2::req_headers(
87+
"Api-Key" = pinecone_api_key,
88+
"X-Pinecone-API-Version" = "2024-10"
89+
)
90+
91+
response <- httr2::response_json(
92+
body = test_fixtures[["read_record"]]
93+
)
94+
95+
response_body <- httr2::resp_body_json(response)
96+
results <- response_body$vectors
97+
98+
results
99+
},
100+
101+
find_records = function(query, top_k = 1) {
102+
103+
embeddings <- private$.get_embeddings(query)
104+
105+
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")
106+
107+
url <- paste0("https://", private$.index_host)
108+
109+
body <- list(
110+
namespace = private$.namespace,
111+
vector = embeddings,
112+
topK = top_k,
113+
includeValues = FALSE,
114+
includeMetadata = TRUE
115+
)
116+
117+
request <- httr2::request(url) |>
118+
httr2::req_url_path_append("query") |>
119+
httr2::req_headers(
120+
"Api-Key" = pinecone_api_key,
121+
"X-Pinecone-API-Version" = "2024-10"
122+
) |>
123+
httr2::req_body_json(body)
124+
125+
response <- httr2::response_json(
126+
body = test_fixtures[["matched_records"]]
127+
)
128+
129+
response_body <- httr2::resp_body_json(response)
130+
results <- response_body$matches
131+
132+
results |>
133+
purrr::map(function(result) {
134+
result$values <- NULL
135+
result
136+
})
137+
}
138+
),
139+
140+
private = list(
141+
.get_embeddings = function(text) {
142+
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")
143+
144+
url <- "https://api.pinecone.io"
145+
146+
body <- list(
147+
model = "multilingual-e5-large",
148+
parameters = list(
149+
input_type = "passage",
150+
truncate = "END"
151+
),
152+
inputs = list(
153+
list(text = text)
154+
)
155+
)
156+
157+
request <- httr2::request(url) |>
158+
httr2::req_url_path_append("embed") |>
159+
httr2::req_headers(
160+
"Api-Key" = pinecone_api_key,
161+
"X-Pinecone-API-Version" = "2024-10"
162+
) |>
163+
httr2::req_body_json(body)
164+
165+
response <- httr2::response_json(
166+
body = test_fixtures[["embeddings"]]
167+
)
168+
169+
response_body <- httr2::resp_body_json(response)
170+
171+
response_body$data[[1]]$values |> unlist()
172+
}
173+
)
174+
)
175+
176+
test_fixtures <- list()
177+
178+
test_fixtures[["pinecone_index_response"]] <- list(
179+
"name" = "gitai",
180+
"metric" = "cosine",
181+
"dimension" = 1024L,
182+
"status" = list(
183+
"ready" = TRUE,
184+
"state" = "Ready"
185+
),
186+
"host" = "gitai-test-host",
187+
"spec" = list(
188+
"serverless" = list(
189+
"region" = "us-east-1",
190+
"cloud" = "aws"
191+
)
192+
)
193+
)
194+
195+
test_fixtures[["embeddings"]] <- list(
196+
"model" = "multilingual-e5-large",
197+
"data" = list(
198+
list(
199+
"values" = list(
200+
runif(1024L, -1, 1) |> as.list()
201+
)
202+
)
203+
),
204+
"usage" = list(
205+
"total_tokens" = 78L
206+
)
207+
)
208+
209+
test_fixtures[["matched_records"]] <- list(
210+
"results" = list(),
211+
"matches" = list(
212+
list(
213+
"id" = "id_2",
214+
"score" = 0.820673,
215+
"values" = list(),
216+
"metadata" = list(
217+
"files" = c("test_file1", "test_file2"),
218+
"repo_url" = "test_url",
219+
"text" = "This package will best suite you.",
220+
"timestamp" = Sys.Date()
221+
)
222+
)
223+
),
224+
"namespace" = "gitai-tests",
225+
"usage" = list("readUnits" = 10L)
226+
)
227+
228+
test_fixtures[["read_record"]] <- list(
229+
"vectors" = list(
230+
"TestProject" = list(
231+
"values" = test_fixtures[["embeddings"]][["data"]][[1]]["values"],
232+
"metadata" = test_fixtures[["matched_records"]][["matches"]][[1]][["metadata"]]
233+
)
234+
),
235+
"namespace" = "gitai-tests",
236+
"usage" = list("readUnits" = 1L)
237+
)

inst/example_workflow.R

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
gitai_demo <- initialize_project("gitai-tests") |>
2+
set_database(index = "gitai-mb",
3+
namespace = "gitai-demo-2") |>
4+
set_github_repos(
5+
orgs = "r-world-devs"
6+
) |>
7+
add_files(files = "\\.md") |>
8+
set_llm() |>
9+
set_prompt("Provide a one-two sentence description of the product based on input.")
10+
11+
process_repos(gitai_demo)
12+
13+
gitai_demo$db$find_records("Find package with which I can plot data.")
14+
15+
gitai_demo$db$read_record("GitStats")

tests/testthat/test-Pinecone.R

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
test_that("getting index metadata", {
22

3-
db <- Pinecone$new(
3+
db <- PineconeMocked$new(
44
namespace = "test_project_id",
55
index = "gitai"
66
)
@@ -11,7 +11,7 @@ test_that("getting index metadata", {
1111

1212
test_that("getting embeddings", {
1313

14-
db <- Pinecone$new(
14+
db <- PineconeMocked$new(
1515
namespace = "test_project_id",
1616
index = "gitai"
1717
)
@@ -24,7 +24,7 @@ test_that("getting embeddings", {
2424

2525
test_that("writting records", {
2626

27-
db <- Pinecone$new(
27+
db <- PineconeMocked$new(
2828
namespace = "test_project_id",
2929
index = "gitai"
3030
)
@@ -51,9 +51,7 @@ test_that("writting records", {
5151

5252
test_that("finding records", {
5353

54-
Sys.sleep(3)
55-
56-
db <- Pinecone$new(
54+
db <- PineconeMocked$new(
5755
namespace = "test_project_id",
5856
index = "gitai"
5957
)
@@ -68,17 +66,11 @@ test_that("finding records", {
6866
result[[1]]$metadata$text |> is.character() |> expect_true()
6967
result[[1]]$score |> is.numeric() |> expect_true()
7068

71-
result_2 <- db$find_records(
72-
query = "Tell me about apple fruit.",
73-
top_k = 1
74-
)
75-
76-
expect_false(result_2[[1]]$id == result[[1]]$id)
7769
})
7870

7971
test_that("reading records", {
8072

81-
db <- Pinecone$new(
73+
db <- PineconeMocked$new(
8274
namespace = "test_project_id",
8375
index = "gitai"
8476
)

tests/testthat/test-set_database.R

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
test_that("setting database provider with default namespace", {
2-
2+
33
gitai <- initialize_project("gitai-demo") |>
44
set_database(
5-
provider = "Pinecone",
5+
provider = "PineconeMocked",
66
index = "gitai"
7-
)
8-
7+
)
8+
99
gitai$db$index |> expect_equal("gitai")
1010
gitai$db$namespace |> expect_equal("gitai-demo")
1111
})
@@ -14,11 +14,11 @@ test_that("setting database provider with custom namepsace", {
1414

1515
gitai <- initialize_project("gitai-demo") |>
1616
set_database(
17-
provider = "Pinecone",
17+
provider = "PineconeMocked",
1818
index = "gitai",
1919
namespace = "test_namespace"
20-
)
21-
20+
)
21+
2222
gitai$db$index |> expect_equal("gitai")
2323
gitai$db$namespace |> expect_equal("test_namespace")
2424
})

0 commit comments

Comments
 (0)