apache
diff --git a/‎.github/workflows/actions.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/actions.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/codeql.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/codeql.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/dependency-submission.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/dependency-submission.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/mark-stale-PRs.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/mark-stale-PRs.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/run-checks-python.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/run-checks-python.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev-tools/scripts/pyproject.toml‎
Lines changed: 0 additions & 3 deletions b/‎dev-tools/scripts/pyproject.toml‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎dev-tools/scripts/requirements.txt‎
Lines changed: 5 additions & 5 deletions b/‎dev-tools/scripts/requirements.txt‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎lucene/CHANGES.txt‎
Lines changed: 5 additions & 0 deletions b/‎lucene/CHANGES.txt‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorScorerUtil.java‎
Lines changed: 3 additions & 1 deletion b/‎lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorScorerUtil.java‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎lucene/core/src/java/org/apache/lucene/codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java‎
Lines changed: 53 additions & 42 deletions b/‎lucene/core/src/java/org/apache/lucene/codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java‎
Lines changed: 53 additions & 42 deletions
@@ -24,7 +24,7 @@ jobs:
         with:
           persist-credentials: false
       - name: Install the latest version of uv
-        uses: astral-sh/setup-uv@b75a909f75acd358c2196fb9a5f1299a9a8868a4 # v6.7.0
+        uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6.8.0
       - name: Check workflows with actionlint
         run: uvx --from actionlint-py actionlint -color
   zizmor:
@@ -38,15 +38,15 @@ jobs:
         with:
           persist-credentials: false
       - name: Install the latest version of uv
-        uses: astral-sh/setup-uv@b75a909f75acd358c2196fb9a5f1299a9a8868a4 # v6.7.0
+        uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6.8.0
       - name: Run zizmor
         run: uvx zizmor --pedantic --format=sarif . > results.sarif
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           # TODO: offline checks only to avoid any rate-limiting issues, maybe enable nightly?
           ZIZMOR_OFFLINE: true
       - name: Upload SARIF file
-        uses: github/codeql-action/upload-sarif@192325c86100d080feab897ff886c34abd4c83a3 # v3.30.3
+        uses: github/codeql-action/upload-sarif@3599b3baa15b485a2e49ef411a7a4bb2452e7f93 # v3.30.5
         with:
           sarif_file: results.sarif
           category: zizmor
@@ -42,14 +42,14 @@ jobs:
 
       # Initializes the CodeQL tools for scanning.
       - name: Initialize CodeQL
-        uses: github/codeql-action/init@192325c86100d080feab897ff886c34abd4c83a3 # v3.30.3
+        uses: github/codeql-action/init@3599b3baa15b485a2e49ef411a7a4bb2452e7f93 # v3.30.5
         with:
           languages: ${{ matrix.language }}
           build-mode: ${{ matrix.build-mode }}
           queries: security-extended
           config-file: ./.github/codeql-config.yml
 
       - name: Perform CodeQL Analysis
-        uses: github/codeql-action/analyze@192325c86100d080feab897ff886c34abd4c83a3 # v3.30.3
+        uses: github/codeql-action/analyze@3599b3baa15b485a2e49ef411a7a4bb2452e7f93 # v3.30.5
         with:
           category: "/language:${{ matrix.language }}"
@@ -27,7 +27,7 @@ jobs:
         uses: ./.github/actions/prepare-for-build
 
       - name: Generate and submit dependency graph
-        uses: gradle/actions/dependency-submission@748248ddd2a24f49513d8f472f81c3a07d4d50e1 # v4.4.4
+        uses: gradle/actions/dependency-submission@4d9f0ba0025fe599b4ebab900eb7f3a1d93ef4c2 # v5.0.0
         env:
           DEPENDENCY_GRAPH_INCLUDE_CONFIGURATIONS: "(?i)(^|:)(compileClasspath|runtimeClasspath|testCompileClasspath|testRuntimeClasspath)$"
           DEPENDENCY_GRAPH_EXCLUDE_CONFIGURATIONS: "(?i)(^|:)(classpath|.*PluginClasspath|kotlinCompilerClasspath|kaptClasspath|annotationProcessor|detachedConfiguration.*)$"
 
@@ -22,7 +22,7 @@ jobs:
 
     steps:
       - name: Run stale PR action
-        uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
+        uses: actions/stale@3a9db7e6a41a89f618792c92c0e97cc736e1b13f # v10.0.0
         with:
           repo-token: ${{ secrets.GITHUB_TOKEN }}
 
 
@@ -34,7 +34,7 @@ jobs:
           persist-credentials: false
 
       - name: Setup Python
-        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
         with:
           python-version: "3.12.6"
 
 
@@ -22,9 +22,6 @@ reportUnnecessaryComparison = "hint"         # Requires cleaning up some dead co
 reportUntypedNamedTuple = "hint"             # Requires moving to different type of named tuple
 reportUnusedCallResult = "none"              # Forces you to assign unused retvals to '_', very noisy.
 
-# Intentionally disabled because it slows pyright by 2x
-reportShadowedImports = "none"       # Extremely slow check
-
 [tool.ruff]
 target-version = "py312"
 line-length = 200
 
@@ -1,18 +1,18 @@
 # jinja template processing of releaseWizard.yaml
 Jinja2==3.1.6
 # parsing and processing of releaseWizard.yaml
-PyYAML==6.0.2
+PyYAML==6.0.3
 # international holidays in releaseWizard
-holidays==0.80
+holidays==0.81
 # calendar processing in releaseWizard
 ics==0.7.2
 # terminal processing in releaseWizard
 console-menu==0.8.0
 # pull request processing in githubPRs
-PyGithub==2.7.0
+PyGithub==2.8.1
 # JIRA processing in githubPRs
 jira==3.8.0
 # type-checking in "make lint"
-basedpyright==1.31.4
+basedpyright==1.31.6
 # linting in "make lint"
-ruff==0.12.11
+ruff==0.13.2
@@ -156,6 +156,11 @@ New Features
   `Lucene104HnswScalarQuantizedVectorsFormat` replaces the now legacy `Lucene99HnswScalarQuantizedVectorsFormat`
    (Trevor McCulloch)
 
+ * GITHUB#15271: Extend `Lucene104ScalarQuantizedVectorsFormat` and `Lucene104HnswScalarQuantizedVectorsFormat` to
+   allow asymmetric quantization. The initially supported bits are single bit with 4 bit queries. This is a replacement
+   for the now legacy `Lucene102HnswBinaryQuantizedVectorsFormat` and `Lucene102BinaryQuantizedVectorsFormat`.
+    (Ben Trent)
+
 Improvements
 ---------------------
 * GITHUB#15148: Add support uint8 distance and allow 8 bit scalar quantization (Trevor McCulloch)
 
@@ -17,6 +17,7 @@
 
 package org.apache.lucene.codecs.hnsw;
 
+import org.apache.lucene.codecs.lucene104.AsymmetricScalarQuantizeFlatVectorsScorer;
 import org.apache.lucene.internal.vectorization.VectorizationProvider;
 
 /**
@@ -47,7 +48,8 @@ public static FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer() {
    * retrieved through this method may be optimized on certain platforms. Otherwise, a
    * DefaultFlatVectorScorer is returned.
    */
-  public static FlatVectorsScorer getLucene104ScalarQuantizedFlatVectorsScorer() {
+  public static AsymmetricScalarQuantizeFlatVectorsScorer
+      getLucene104ScalarQuantizedFlatVectorsScorer() {
     return IMPL.getLucene104ScalarQuantizedVectorsScorer();
   }
 }
@@ -16,9 +16,10 @@
  */
 package org.apache.lucene.codecs.lucene102;
 
-import static org.apache.lucene.codecs.lucene102.Lucene102BinaryQuantizedVectorsFormat.INDEX_BITS;
 import static org.apache.lucene.codecs.lucene102.Lucene102BinaryQuantizedVectorsFormat.QUERY_BITS;
 import static org.apache.lucene.index.VectorSimilarityFunction.COSINE;
+import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN;
+import static org.apache.lucene.index.VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT;
 import static org.apache.lucene.util.quantization.OptimizedScalarQuantizer.transposeHalfByte;
 
 import java.io.IOException;
@@ -30,13 +31,13 @@
 import org.apache.lucene.util.hnsw.RandomVectorScorer;
 import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
 import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
-import org.apache.lucene.util.quantization.OptimizedScalarQuantizedVectorSimilarity;
 import org.apache.lucene.util.quantization.OptimizedScalarQuantizer;
 import org.apache.lucene.util.quantization.OptimizedScalarQuantizer.QuantizationResult;
 
 /** Vector scorer over binarized vector values */
 public class Lucene102BinaryFlatVectorsScorer implements FlatVectorsScorer {
   private final FlatVectorsScorer nonQuantizedDelegate;
+  private static final float FOUR_BIT_SCALE = 1f / ((1 << 4) - 1);
 
   public Lucene102BinaryFlatVectorsScorer(FlatVectorsScorer nonQuantizedDelegate) {
     this.nonQuantizedDelegate = nonQuantizedDelegate;
@@ -72,20 +73,10 @@ public RandomVectorScorer getRandomVectorScorer(
           quantizer.scalarQuantize(target, initial, (byte) 4, centroid);
       transposeHalfByte(initial, quantized);
       return new RandomVectorScorer.AbstractRandomVectorScorer(binarizedVectors) {
-        private final OptimizedScalarQuantizedVectorSimilarity similarity =
-            new OptimizedScalarQuantizedVectorSimilarity(
-                similarityFunction,
-                binarizedVectors.dimension(),
-                binarizedVectors.getCentroidDP(),
-                QUERY_BITS,
-                INDEX_BITS);
-
         @Override
         public float score(int node) throws IOException {
-          var indexVector = binarizedVectors.vectorValue(node);
-          var indexCorrections = binarizedVectors.getCorrectiveTerms(node);
-          float dotProduct = VectorUtil.int4BitDotProduct(quantized, indexVector);
-          return similarity.score(dotProduct, queryCorrections, indexCorrections);
+          return quantizedScore(
+              quantized, queryCorrections, binarizedVectors, node, similarityFunction);
         }
       };
     }
@@ -102,8 +93,7 @@ public RandomVectorScorer getRandomVectorScorer(
   RandomVectorScorerSupplier getRandomVectorScorerSupplier(
       VectorSimilarityFunction similarityFunction,
       Lucene102BinaryQuantizedVectorsWriter.OffHeapBinarizedQueryVectorValues scoringVectors,
-      BinarizedByteVectorValues targetVectors)
-      throws IOException {
+      BinarizedByteVectorValues targetVectors) {
     return new BinarizedRandomVectorScorerSupplier(
         scoringVectors, targetVectors, similarityFunction);
   }
@@ -118,31 +108,15 @@ static class BinarizedRandomVectorScorerSupplier implements RandomVectorScorerSu
     private final Lucene102BinaryQuantizedVectorsWriter.OffHeapBinarizedQueryVectorValues
         queryVectors;
     private final BinarizedByteVectorValues targetVectors;
-    private final OptimizedScalarQuantizedVectorSimilarity similarity;
-
-    BinarizedRandomVectorScorerSupplier(
-        Lucene102BinaryQuantizedVectorsWriter.OffHeapBinarizedQueryVectorValues queryVectors,
-        BinarizedByteVectorValues targetVectors,
-        VectorSimilarityFunction similarityFunction)
-        throws IOException {
-      this.queryVectors = queryVectors;
-      this.targetVectors = targetVectors;
-      this.similarity =
-          new OptimizedScalarQuantizedVectorSimilarity(
-              similarityFunction,
-              targetVectors.dimension(),
-              targetVectors.getCentroidDP(),
-              QUERY_BITS,
-              INDEX_BITS);
-    }
+    private final VectorSimilarityFunction similarityFunction;
 
     BinarizedRandomVectorScorerSupplier(
         Lucene102BinaryQuantizedVectorsWriter.OffHeapBinarizedQueryVectorValues queryVectors,
         BinarizedByteVectorValues targetVectors,
-        OptimizedScalarQuantizedVectorSimilarity similarity) {
+        VectorSimilarityFunction similarityFunction) {
       this.queryVectors = queryVectors;
       this.targetVectors = targetVectors;
-      this.similarity = similarity;
+      this.similarityFunction = similarityFunction;
     }
 
     @Override
@@ -165,20 +139,57 @@ public float score(int node) throws IOException {
           if (vector == null || queryCorrections == null) {
             throw new IllegalStateException("setScoringOrdinal was not called");
           }
-          var indexVector = targetVectors.vectorValue(node);
-          var indexCorrections = targetVectors.getCorrectiveTerms(node);
-          return similarity.score(
-              (float) VectorUtil.int4BitDotProduct(vector, indexVector),
-              queryCorrections,
-              indexCorrections);
+          return quantizedScore(vector, queryCorrections, targetVectors, node, similarityFunction);
         }
       };
     }
 
     @Override
     public RandomVectorScorerSupplier copy() throws IOException {
       return new BinarizedRandomVectorScorerSupplier(
-          queryVectors.copy(), targetVectors.copy(), similarity);
+          queryVectors.copy(), targetVectors.copy(), similarityFunction);
+    }
+  }
+
+  static float quantizedScore(
+      byte[] quantizedQuery,
+      OptimizedScalarQuantizer.QuantizationResult queryCorrections,
+      BinarizedByteVectorValues targetVectors,
+      int targetOrd,
+      VectorSimilarityFunction similarityFunction)
+      throws IOException {
+    byte[] binaryCode = targetVectors.vectorValue(targetOrd);
+    float qcDist = VectorUtil.int4BitDotProduct(quantizedQuery, binaryCode);
+    OptimizedScalarQuantizer.QuantizationResult indexCorrections =
+        targetVectors.getCorrectiveTerms(targetOrd);
+    float x1 = indexCorrections.quantizedComponentSum();
+    float ax = indexCorrections.lowerInterval();
+    // Here we assume `lx` is simply bit vectors, so the scaling isn't necessary
+    float lx = indexCorrections.upperInterval() - ax;
+    float ay = queryCorrections.lowerInterval();
+    float ly = (queryCorrections.upperInterval() - ay) * FOUR_BIT_SCALE;
+    float y1 = queryCorrections.quantizedComponentSum();
+    float score =
+        ax * ay * targetVectors.dimension() + ay * lx * x1 + ax * ly * y1 + lx * ly * qcDist;
+    // For euclidean, we need to invert the score and apply the additional correction, which is
+    // assumed to be the squared l2norm of the centroid centered vectors.
+    if (similarityFunction == EUCLIDEAN) {
+      score =
+          queryCorrections.additionalCorrection()
+              + indexCorrections.additionalCorrection()
+              - 2 * score;
+      return Math.max(1 / (1f + score), 0);
+    } else {
+      // For cosine and max inner product, we need to apply the additional correction, which is
+      // assumed to be the non-centered dot-product between the vector and the centroid
+      score +=
+          queryCorrections.additionalCorrection()
+              + indexCorrections.additionalCorrection()
+              - targetVectors.getCentroidDP();
+      if (similarityFunction == MAXIMUM_INNER_PRODUCT) {
+        return VectorUtil.scaleMaxInnerProductScore(score);
+      }
+      return Math.max((1f + score) / 2f, 0);
     }
   }
 }
Original file line number	Diff line number	Diff line change
`@@ -17,6 +17,7 @@`
`17`	`17`
`18`	`18`	`package org.apache.lucene.codecs.hnsw;`
`19`	`19`
	`20`	`+import org.apache.lucene.codecs.lucene104.AsymmetricScalarQuantizeFlatVectorsScorer;`
`20`	`21`	`import org.apache.lucene.internal.vectorization.VectorizationProvider;`
`21`	`22`
`22`	`23`	`/**`
`@@ -47,7 +48,8 @@ public static FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer() {`
`47`	`48`	`* retrieved through this method may be optimized on certain platforms. Otherwise, a`
`48`	`49`	`* DefaultFlatVectorScorer is returned.`
`49`	`50`	`*/`
`50`		`- public static FlatVectorsScorer getLucene104ScalarQuantizedFlatVectorsScorer() {`
	`51`	`+ public static AsymmetricScalarQuantizeFlatVectorsScorer`
	`52`	`+ getLucene104ScalarQuantizedFlatVectorsScorer() {`
`51`	`53`	`return IMPL.getLucene104ScalarQuantizedVectorsScorer();`
`52`	`54`	`}`
`53`	`55`	`}`