-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathapp_example.py
78 lines (67 loc) · 2.48 KB
/
app_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
from flask import Flask, flash, render_template, request, send_from_directory
from sentence_transformers import SentenceTransformer
from collections import Counter
import hybrid_pinecone_client
import mmh3
from nltk.tokenize import word_tokenize
from nltk.stem import SnowballStemmer
# Configure and connect to Pinecone index
api_key = "YOUR_API_KEY"
pinecone_env = "YOUR_ENVIRONMENT"
index_name = "YOUR_INDEX_NAME"
pinecone = hybrid_pinecone_client.HybridPinecone(api_key, pinecone_env)
pinecone.connect_index(index_name)
# Flask settings
app = Flask(__name__)
app.config['SECRET_KEY'] = 'YOUR_SECRET_KEY'
# Flask route to include favicon
@app.route('/favicon.ico')
def favicon():
return send_from_directory(os.path.join(app.root_path, 'static'),
'favicon.ico', mimetype='image/vnd.microsoft.icon')
# Flask route to perform search and render results
@app.route('/', methods=('GET', 'POST'))
def index():
if request.method == 'POST':
search = request.form['search']
alpha = request.form['alpha']
if not alpha:
alpha = 0.3
if not search:
flash('Please enter your search')
else:
results = hybrid_query(search, 20, alpha)
pinecone_results = [{'resultArray': results}]
return render_template('index.html', results=pinecone_results)
return render_template('index.html', initialPage=True)
# Helpers
# load a sentence transformer model from huggingface
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
# Create a tokenizer
class Tokenizer:
def __init__(self):
self.stemmer = SnowballStemmer('english')
def encode(self, text):
words = [self.stemmer.stem(word) for word in word_tokenize(text)]
ids = [mmh3.hash(word, signed=False) for word in words]
return dict(Counter(ids))
tokenizer = Tokenizer()
# Parse query and perform search
def hybrid_query(question, top_k, alpha):
# convert the question into a sparse vector
sparse_vec = tokenizer.encode(str(question))
# convert the question into a dense vector
dense_vec = model.encode([question]).tolist()
# set the query parameters to send to pinecone
query = {
"topK": top_k,
"vector": dense_vec,
"sparseVector": sparse_vec,
"alpha": alpha,
"includeMetadata": True
}
# query pinecone with the query parameters
result = pinecone.query(query)
# return search results as json
return result