Skip to content

Commit 9a6f6dd

Browse files
authored
Merge pull request #155 from Azure-Samples/cosineoperator
Update HNSW indexes to use cosine operator
2 parents 3b938d6 + 909b833 commit 9a6f6dd

File tree

2 files changed

+14
-7
lines changed

2 files changed

+14
-7
lines changed

Diff for: .github/workflows/app-tests.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ jobs:
7373
if: matrix.os == 'ubuntu-latest'
7474
run: |
7575
sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh -y
76-
sudo apt-get install postgresql-14-pgvector
76+
sudo apt-get install postgresql-16-pgvector
7777
sudo systemctl start postgresql
7878
sudo -u postgres psql -c "ALTER USER ${{ env.POSTGRES_USERNAME }} PASSWORD '${{ env.POSTGRES_PASSWORD }}'"
7979
sudo -u postgres psql -c 'CREATE EXTENSION vector'

Diff for: src/backend/fastapi_app/postgres_models.py

+13-6
Original file line numberDiff line numberDiff line change
@@ -39,23 +39,30 @@ def to_str_for_embedding(self):
3939
return f"Name: {self.name} Description: {self.description} Type: {self.type}"
4040

4141

42-
# Define HNSW index to support vector similarity search
43-
# Use the vector_ip_ops access method (inner product) since these embeddings are normalized
42+
"""
43+
**Define HNSW index to support vector similarity search**
44+
45+
We use the vector_cosine_ops access method (cosine distance)
46+
since it works for both normalized and non-normalized vector embeddings
47+
If you know your embeddings are normalized,
48+
you can switch to inner product for potentially better performance.
49+
The index operator should match the operator used in queries.
50+
"""
4451

4552
table_name = Item.__tablename__
4653

4754
index_ada002 = Index(
48-
"hnsw_index_for_innerproduct_{table_name}_embedding_ada002",
55+
"hnsw_index_for_cosine_{table_name}_embedding_ada002",
4956
Item.embedding_ada002,
5057
postgresql_using="hnsw",
5158
postgresql_with={"m": 16, "ef_construction": 64},
52-
postgresql_ops={"embedding_ada002": "vector_ip_ops"},
59+
postgresql_ops={"embedding_ada002": "vector_cosine_ops"},
5360
)
5461

5562
index_nomic = Index(
56-
f"hnsw_index_for_innerproduct_{table_name}_embedding_nomic",
63+
f"hnsw_index_for_cosine_{table_name}_embedding_nomic",
5764
Item.embedding_nomic,
5865
postgresql_using="hnsw",
5966
postgresql_with={"m": 16, "ef_construction": 64},
60-
postgresql_ops={"embedding_nomic": "vector_ip_ops"},
67+
postgresql_ops={"embedding_nomic": "vector_cosine_ops"},
6168
)

0 commit comments

Comments
 (0)