Skip to content

Commit ab8aab3

Browse files
Vectorsearch providers should utilize the global Langchain.logger (#804)
* Vectorsearch providers should utilize the global Langchain.logger * CHANGELOG entr * wip: updating milvus to newer version * Fix specs * fix linter * Update CHANGELOG.md * updates * Update CHANGELOG.md
1 parent ffe667c commit ab8aab3

File tree

10 files changed

+135
-138
lines changed

10 files changed

+135
-138
lines changed

.env.example

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ HUGGING_FACE_API_KEY=
1717
LLAMACPP_MODEL_PATH=
1818
LLAMACPP_N_THREADS=
1919
LLAMACPP_N_GPU_LAYERS=
20-
MILVUS_URL=
20+
MILVUS_URL=http://localhost:19530
2121
MISTRAL_AI_API_KEY=
2222
NEWS_API_KEY=
2323
OLLAMA_URL=http://localhost:11434

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
## [Unreleased]
2+
- [BREAKING] Langchain::Vectorsearch::Milvus was rewritten to work with newer milvus 0.10.0 gem
23
- Assistant can now process image_urls in the messages (currently only for OpenAI and Mistral AI)
4+
- Vectorsearch providers utilize the global Langchain.logger
5+
- Update required milvus, qdrant and weaviate versions
36

47
## [0.16.1] - 2024-09-30
58
- Deprecate Langchain::LLM::GooglePalm

Gemfile.lock

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ GEM
157157
faraday (~> 2.0)
158158
typhoeus (~> 1.4)
159159
ffi (1.16.3)
160+
fiber-storage (1.0.0)
160161
google-cloud-env (2.1.1)
161162
faraday (>= 1.0, < 3.a)
162163
google_palm_api (0.1.3)
@@ -169,12 +170,13 @@ GEM
169170
multi_json (~> 1.11)
170171
os (>= 0.9, < 2.0)
171172
signet (>= 0.16, < 2.a)
172-
graphlient (0.7.0)
173+
graphlient (0.8.0)
173174
faraday (~> 2.0)
174175
graphql-client
175-
graphql (2.3.4)
176+
graphql (2.3.16)
176177
base64
177-
graphql-client (0.22.0)
178+
fiber-storage
179+
graphql-client (0.23.0)
178180
activesupport (>= 3.0)
179181
graphql (>= 1.13.0)
180182
hashdiff (1.1.0)
@@ -211,7 +213,7 @@ GEM
211213
net-smtp
212214
matrix (0.4.2)
213215
method_source (1.1.0)
214-
milvus (0.9.3)
216+
milvus (0.10.3)
215217
faraday (>= 2.0.1, < 3)
216218
mini_mime (1.1.5)
217219
mini_portile2 (2.8.6)
@@ -282,7 +284,7 @@ GEM
282284
psych (5.1.2)
283285
stringio
284286
public_suffix (5.0.5)
285-
qdrant-ruby (0.9.7)
287+
qdrant-ruby (0.9.8)
286288
faraday (>= 2.0.1, < 3)
287289
racc (1.8.0)
288290
rack (3.0.11)
@@ -419,9 +421,9 @@ GEM
419421
parser (>= 3.3.0)
420422
uri (0.13.0)
421423
vcr (6.2.0)
422-
weaviate-ruby (0.8.10)
424+
weaviate-ruby (0.9.2)
423425
faraday (>= 2.0.1, < 3.0)
424-
graphlient (~> 0.7.0)
426+
graphlient (>= 0.7.0, < 0.9.0)
425427
webmock (3.23.1)
426428
addressable (>= 2.8.0)
427429
crack (>= 0.3.2)
@@ -461,7 +463,7 @@ DEPENDENCIES
461463
langchainrb!
462464
llama_cpp (~> 0.9.4)
463465
mail (~> 2.8)
464-
milvus (~> 0.9.3)
466+
milvus (~> 0.10.3)
465467
mistral-ai
466468
nokogiri (~> 1.13)
467469
pdf-reader (~> 2.0)
@@ -470,7 +472,7 @@ DEPENDENCIES
470472
pinecone (~> 0.1.6)
471473
power_point_pptx (~> 0.1.0)
472474
pry-byebug (~> 3.10.0)
473-
qdrant-ruby (~> 0.9.4)
475+
qdrant-ruby (~> 0.9.8)
474476
rake (~> 13.0)
475477
rdiscount (~> 2.2.7)
476478
replicate-ruby (~> 0.2.2)
@@ -483,7 +485,7 @@ DEPENDENCIES
483485
sequel (~> 5.68.0)
484486
standard (>= 1.35.1)
485487
vcr
486-
weaviate-ruby (~> 0.8.10)
488+
weaviate-ruby (~> 0.9.2)
487489
webmock
488490
wikipedia-client (~> 1.17.0)
489491
yard (~> 0.9.34)

langchain.gemspec

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ Gem::Specification.new do |spec|
5757
spec.add_development_dependency "google_search_results", "~> 2.0.0"
5858
spec.add_development_dependency "hnswlib", "~> 0.8.1"
5959
spec.add_development_dependency "hugging-face", "~> 0.3.4"
60-
spec.add_development_dependency "milvus", "~> 0.9.3"
60+
spec.add_development_dependency "milvus", "~> 0.10.3"
6161
spec.add_development_dependency "llama_cpp", "~> 0.9.4"
6262
spec.add_development_dependency "nokogiri", "~> 1.13"
6363
spec.add_development_dependency "mail", "~> 2.8"
@@ -67,13 +67,13 @@ Gem::Specification.new do |spec|
6767
spec.add_development_dependency "pdf-reader", "~> 2.0"
6868
spec.add_development_dependency "pinecone", "~> 0.1.6"
6969
spec.add_development_dependency "replicate-ruby", "~> 0.2.2"
70-
spec.add_development_dependency "qdrant-ruby", "~> 0.9.4"
70+
spec.add_development_dependency "qdrant-ruby", "~> 0.9.8"
7171
spec.add_development_dependency "roo", "~> 2.10.0"
7272
spec.add_development_dependency "roo-xls", "~> 1.2.0"
7373
spec.add_development_dependency "ruby-openai", "~> 7.1.0"
7474
spec.add_development_dependency "safe_ruby", "~> 1.0.4"
7575
spec.add_development_dependency "sequel", "~> 5.68.0"
76-
spec.add_development_dependency "weaviate-ruby", "~> 0.8.10"
76+
spec.add_development_dependency "weaviate-ruby", "~> 0.9.2"
7777
spec.add_development_dependency "wikipedia-client", "~> 1.17.0"
7878
spec.add_development_dependency "power_point_pptx", "~> 0.1.0"
7979
end

lib/langchain/vectorsearch/elasticsearch.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def initialize(url:, index_name:, llm:, api_key: nil, es_options: {})
3737
@options = {
3838
url: url,
3939
request_timeout: 20,
40-
log: false
40+
logger: Langchain.logger
4141
}.merge(es_options)
4242

4343
@es_client = ::Elasticsearch::Client.new(**options)

lib/langchain/vectorsearch/milvus.rb

Lines changed: 45 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,18 @@ class Milvus < Base
66
# Wrapper around Milvus REST APIs.
77
#
88
# Gem requirements:
9-
# gem "milvus", "~> 0.9.3"
9+
# gem "milvus", "~> 0.10.3"
1010
#
1111
# Usage:
12-
# milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, api_key:)
12+
# milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, api_key:)
1313
#
14-
1514
def initialize(url:, index_name:, llm:, api_key: nil)
1615
depends_on "milvus"
1716

18-
@client = ::Milvus::Client.new(url: url)
17+
@client = ::Milvus::Client.new(
18+
url: url,
19+
logger: Langchain.logger
20+
)
1921
@index_name = index_name
2022

2123
super(llm: llm)
@@ -24,33 +26,24 @@ def initialize(url:, index_name:, llm:, api_key: nil)
2426
def add_texts(texts:)
2527
client.entities.insert(
2628
collection_name: index_name,
27-
num_rows: Array(texts).size,
28-
fields_data: [
29-
{
30-
field_name: "content",
31-
type: ::Milvus::DATA_TYPES["varchar"],
32-
field: Array(texts)
33-
}, {
34-
field_name: "vectors",
35-
type: ::Milvus::DATA_TYPES["float_vector"],
36-
field: Array(texts).map { |text| llm.embed(text: text).embedding }
37-
}
38-
]
29+
data: texts.map do |text|
30+
{content: text, vector: llm.embed(text: text).embedding}
31+
end
3932
)
4033
end
4134

35+
# TODO: Add update_texts method
36+
4237
# Deletes a list of texts in the index
4338
#
4439
# @param ids [Array<Integer>] The ids of texts to delete
4540
# @return [Boolean] The response from the server
4641
def remove_texts(ids:)
4742
raise ArgumentError, "ids must be an array" unless ids.is_a?(Array)
48-
# Convert ids to integers if strings are passed
49-
ids = ids.map(&:to_i)
5043

5144
client.entities.delete(
5245
collection_name: index_name,
53-
expression: "id in #{ids}"
46+
filter: "id in #{ids}"
5447
)
5548
end
5649

@@ -62,33 +55,25 @@ def create_default_schema
6255
client.collections.create(
6356
auto_id: true,
6457
collection_name: index_name,
65-
description: "Default schema created by langchain.rb",
6658
fields: [
6759
{
68-
name: "id",
69-
is_primary_key: true,
70-
autoID: true,
71-
data_type: ::Milvus::DATA_TYPES["int64"]
60+
fieldName: "id",
61+
isPrimary: true,
62+
dataType: "Int64"
7263
}, {
73-
name: "content",
74-
is_primary_key: false,
75-
data_type: ::Milvus::DATA_TYPES["varchar"],
76-
type_params: [
77-
{
78-
key: "max_length",
79-
value: "32768" # Largest allowed value
80-
}
81-
]
64+
fieldName: "content",
65+
isPrimary: false,
66+
dataType: "VarChar",
67+
elementTypeParams: {
68+
max_length: "32768" # Largest allowed value
69+
}
8270
}, {
83-
name: "vectors",
84-
data_type: ::Milvus::DATA_TYPES["float_vector"],
85-
is_primary_key: false,
86-
type_params: [
87-
{
88-
key: "dim",
89-
value: llm.default_dimensions.to_s
90-
}
91-
]
71+
fieldName: "vector",
72+
isPrimary: false,
73+
dataType: "FloatVector",
74+
elementTypeParams: {
75+
dim: llm.default_dimensions.to_s
76+
}
9277
}
9378
]
9479
)
@@ -97,27 +82,31 @@ def create_default_schema
9782
# Create the default index
9883
# @return [Boolean] The response from the server
9984
def create_default_index
100-
client.indices.create(
85+
client.indexes.create(
10186
collection_name: index_name,
102-
field_name: "vectors",
103-
extra_params: [
104-
{key: "metric_type", value: "L2"},
105-
{key: "index_type", value: "IVF_FLAT"},
106-
{key: "params", value: "{\"nlist\":1024}"}
87+
index_params: [
88+
{
89+
metricType: "L2",
90+
fieldName: "vector",
91+
indexName: "vector_idx",
92+
indexConfig: {
93+
index_type: "AUTOINDEX"
94+
}
95+
}
10796
]
10897
)
10998
end
11099

111100
# Get the default schema
112101
# @return [Hash] The response from the server
113102
def get_default_schema
114-
client.collections.get(collection_name: index_name)
103+
client.collections.describe(collection_name: index_name)
115104
end
116105

117106
# Delete default schema
118107
# @return [Hash] The response from the server
119108
def destroy_default_schema
120-
client.collections.delete(collection_name: index_name)
109+
client.collections.drop(collection_name: index_name)
121110
end
122111

123112
# Load default schema into memory
@@ -138,16 +127,12 @@ def similarity_search(query:, k: 4)
138127
def similarity_search_by_vector(embedding:, k: 4)
139128
load_default_schema
140129

141-
client.search(
130+
client.entities.search(
142131
collection_name: index_name,
143-
output_fields: ["id", "content"], # Add "vectors" if need to have full vectors returned.
144-
top_k: k.to_s,
145-
vectors: [embedding],
146-
dsl_type: 1,
147-
params: "{\"nprobe\": 10}",
148-
anns_field: "vectors",
149-
metric_type: "L2",
150-
vector_type: ::Milvus::DATA_TYPES["float_vector"]
132+
anns_field: "vector",
133+
data: [embedding],
134+
limit: k,
135+
output_fields: ["content", "id", "vector"]
151136
)
152137
end
153138

@@ -159,8 +144,7 @@ def similarity_search_by_vector(embedding:, k: 4)
159144
def ask(question:, k: 4, &block)
160145
search_results = similarity_search(query: question, k: k)
161146

162-
content_field = search_results.dig("results", "fields_data").select { |field| field.dig("field_name") == "content" }
163-
content_data = content_field.first.dig("Field", "Scalars", "Data", "StringData", "data")
147+
content_data = search_results.dig("data").map { |result| result.dig("content") }
164148

165149
context = content_data.join("\n---\n")
166150

lib/langchain/vectorsearch/qdrant.rb

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ class Qdrant < Base
66
# Wrapper around Qdrant
77
#
88
# Gem requirements:
9-
# gem "qdrant-ruby", "~> 0.9.3"
9+
# gem "qdrant-ruby", "~> 0.9.8"
1010
#
1111
# Usage:
1212
# qdrant = Langchain::Vectorsearch::Qdrant.new(url:, api_key:, index_name:, llm:)
@@ -22,7 +22,8 @@ def initialize(url:, api_key:, index_name:, llm:)
2222

2323
@client = ::Qdrant::Client.new(
2424
url: url,
25-
api_key: api_key
25+
api_key: api_key,
26+
logger: Langchain.logger
2627
)
2728
@index_name = index_name
2829

lib/langchain/vectorsearch/weaviate.rb

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ class Weaviate < Base
66
# Wrapper around Weaviate
77
#
88
# Gem requirements:
9-
# gem "weaviate-ruby", "~> 0.9.0"
9+
# gem "weaviate-ruby", "~> 0.9.2"
1010
#
1111
# Usage:
1212
# weaviate = Langchain::Vectorsearch::Weaviate.new(url: ENV["WEAVIATE_URL"], api_key: ENV["WEAVIATE_API_KEY"], index_name: "Docs", llm: llm)
@@ -22,7 +22,8 @@ def initialize(url:, index_name:, llm:, api_key: nil)
2222

2323
@client = ::Weaviate::Client.new(
2424
url: url,
25-
api_key: api_key
25+
api_key: api_key,
26+
logger: Langchain.logger
2627
)
2728

2829
# Weaviate requires the class name to be Capitalized: https://weaviate.io/developers/weaviate/configuration/schema-configuration#create-a-class

0 commit comments

Comments
 (0)