forked from redis/redis-py
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquery_combined.py
124 lines (108 loc) · 3.04 KB
/
query_combined.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# EXAMPLE: query_combined
# HIDE_START
import json
import numpy as np
import redis
import warnings
from redis.commands.json.path import Path
from redis.commands.search.field import NumericField, TagField, TextField, VectorField
from redis.commands.search.index_definition import IndexDefinition, IndexType
from redis.commands.search.query import Query
from sentence_transformers import SentenceTransformer
def embed_text(model, text):
return np.array(model.encode(text)).astype(np.float32).tobytes()
warnings.filterwarnings("ignore", category=FutureWarning, message=r".*clean_up_tokenization_spaces.*")
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
query = "Bike for small kids"
query_vector = embed_text(model, query)
r = redis.Redis(decode_responses=True)
# create index
schema = (
TextField("$.description", no_stem=True, as_name="model"),
TagField("$.condition", as_name="condition"),
NumericField("$.price", as_name="price"),
VectorField(
"$.description_embeddings",
"FLAT",
{
"TYPE": "FLOAT32",
"DIM": 384,
"DISTANCE_METRIC": "COSINE",
},
as_name="vector",
),
)
index = r.ft("idx:bicycle")
index.create_index(
schema,
definition=IndexDefinition(prefix=["bicycle:"], index_type=IndexType.JSON),
)
# load data
with open("data/query_vector.json") as f:
bicycles = json.load(f)
pipeline = r.pipeline(transaction=False)
for bid, bicycle in enumerate(bicycles):
pipeline.json().set(f'bicycle:{bid}', Path.root_path(), bicycle)
pipeline.execute()
# HIDE_END
# STEP_START combined1
q = Query("@price:[500 1000] @condition:{new}")
res = index.search(q)
print(res.total) # >>> 1
# REMOVE_START
assert res.total == 1
# REMOVE_END
# STEP_END
# STEP_START combined2
q = Query("kids @price:[500 1000] @condition:{used}")
res = index.search(q)
print(res.total) # >>> 1
# REMOVE_START
assert res.total == 1
# REMOVE_END
# STEP_END
# STEP_START combined3
q = Query("(kids | small) @condition:{used}")
res = index.search(q)
print(res.total) # >>> 2
# REMOVE_START
assert res.total == 2
# REMOVE_END
# STEP_END
# STEP_START combined4
q = Query("@description:(kids | small) @condition:{used}")
res = index.search(q)
print(res.total) # >>> 0
# REMOVE_START
assert res.total == 0
# REMOVE_END
# STEP_END
# STEP_START combined5
q = Query("@description:(kids | small) @condition:{new | used}")
res = index.search(q)
print(res.total) # >>> 0
# REMOVE_START
assert res.total == 0
# REMOVE_END
# STEP_END
# STEP_START combined6
q = Query("@price:[500 1000] -@condition:{new}")
res = index.search(q)
print(res.total) # >>> 2
# REMOVE_START
assert res.total == 2
# REMOVE_END
# STEP_END
# STEP_START combined7
q = Query("(@price:[500 1000] -@condition:{new})=>[KNN 3 @vector $query_vector]").dialect(2)
# put query string here
res = index.search(q,{ 'query_vector': query_vector })
print(res.total) # >>> 2
# REMOVE_START
assert res.total == 2
# REMOVE_END
# STEP_END
# REMOVE_START
# destroy index and data
r.ft("idx:bicycle").dropindex(delete_documents=True)
# REMOVE_END