apache
diff --git a/‎lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104HnswScalarQuantizedVectorsFormat.java‎
Lines changed: 2 additions & 24 deletions b/‎lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104HnswScalarQuantizedVectorsFormat.java‎
Lines changed: 2 additions & 24 deletions
diff --git a/‎lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java‎
Lines changed: 17 additions & 25 deletions b/‎lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java‎
Lines changed: 17 additions & 25 deletions
diff --git a/‎lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java‎
Lines changed: 55 additions & 37 deletions b/‎lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java‎
Lines changed: 55 additions & 37 deletions
diff --git a/‎lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java‎
Lines changed: 1 addition & 14 deletions b/‎lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java‎
Lines changed: 1 addition & 14 deletions
@@ -86,6 +86,7 @@ public Lucene104HnswScalarQuantizedVectorsFormat(int maxConn, int beamWidth) {
   /**
    * Constructs a format using the given graph construction parameters and scalar quantization.
    *
+   * @param encoding the quantization encoding used to encode the vectors
    * @param maxConn the maximum number of connections to a node in the HNSW graph
    * @param beamWidth the size of the queue maintained during graph construction.
    * @param numMergeWorkers number of workers (threads) that will be used when doing merge. If
@@ -99,31 +100,8 @@ public Lucene104HnswScalarQuantizedVectorsFormat(
       int beamWidth,
       int numMergeWorkers,
       ExecutorService mergeExec) {
-    this(encoding, encoding, maxConn, beamWidth, numMergeWorkers, mergeExec);
-  }
-
-  /**
-   * Constructs a format using the given graph construction parameters and scalar quantization.
-   *
-   * @param encoding the encoding used to encode the indexed vectors
-   * @param queryEncoding the encoding used to encode the query vectors. This may be different from
-   *     the encoding used to encode the indexed vectors.
-   * @param maxConn the maximum number of connections to a node in the HNSW graph
-   * @param beamWidth the size of the queue maintained during graph construction.
-   * @param numMergeWorkers number of workers (threads) that will be used when doing merge. If
-   *     larger than 1, a non-null {@link ExecutorService} must be passed as mergeExec
-   * @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are
-   *     generated by this format to do the merge
-   */
-  public Lucene104HnswScalarQuantizedVectorsFormat(
-      ScalarEncoding encoding,
-      ScalarEncoding queryEncoding,
-      int maxConn,
-      int beamWidth,
-      int numMergeWorkers,
-      ExecutorService mergeExec) {
     super(NAME);
-    flatVectorsFormat = new Lucene104ScalarQuantizedVectorsFormat(encoding, queryEncoding);
+    flatVectorsFormat = new Lucene104ScalarQuantizedVectorsFormat(encoding);
     if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) {
       throw new IllegalArgumentException(
           "maxConn must be positive and less than or equal to "
 
@@ -64,19 +64,14 @@ public RandomVectorScorer getRandomVectorScorer(
     if (vectorValues instanceof QuantizedByteVectorValues qv) {
       checkDimensions(target.length, qv.dimension());
       OptimizedScalarQuantizer quantizer = qv.getQuantizer();
-      byte[] scratch = new byte[qv.discretizedDimension()];
+      Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding scalarEncoding = qv.getScalarEncoding();
+      byte[] scratch = new byte[scalarEncoding.getDiscreteDimensions(qv.dimension())];
       final byte[] targetQuantized;
-      if (qv.getScalarEncoding() == qv.getQueryScalarEncoding()) {
-        assert qv.getScalarEncoding()
-            != Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SINGLE_BIT;
+      if (scalarEncoding.isAsymmetric() == false) {
         targetQuantized = scratch;
       } else {
         // This is asymmetric quantization, we will pack the vector
-        assert qv.getScalarEncoding()
-            == Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SINGLE_BIT;
-        assert qv.getQueryScalarEncoding()
-            == Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.PACKED_NIBBLE;
-        targetQuantized = new byte[qv.getQueryScalarEncoding().getPackedLength(scratch.length)];
+        targetQuantized = new byte[scalarEncoding.getQueryPackedLength(scratch.length)];
       }
       // We make a copy as the quantization process mutates the input
       float[] copy = ArrayUtil.copyOfSubArray(target, 0, target.length);
@@ -86,12 +81,10 @@ public RandomVectorScorer getRandomVectorScorer(
       target = copy;
       var targetCorrectiveTerms =
           quantizer.scalarQuantize(
-              target, scratch, qv.getQueryScalarEncoding().getBits(), qv.getCentroid());
-      if (qv.getScalarEncoding() != qv.getQueryScalarEncoding()) {
-        assert qv.getScalarEncoding()
-            == Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SINGLE_BIT;
-        assert qv.getQueryScalarEncoding()
-            == Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.PACKED_NIBBLE;
+              target, scratch, scalarEncoding.getQueryBits(), qv.getCentroid());
+      // for single bit query nibble, we need to transpose the nibbles for fast scoring comparisons
+      if (scalarEncoding
+          == Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SINGLE_BIT_QUERY_NIBBLE) {
         OptimizedScalarQuantizer.transposeHalfByte(scratch, targetQuantized);
       }
       return new RandomVectorScorer.AbstractRandomVectorScorer(qv) {
@@ -137,9 +130,7 @@ static class AsymmetricQuantizedRandomVectorScorerSupplier implements RandomVect
         QuantizedByteVectorValues queryVectors,
         QuantizedByteVectorValues targetVectors,
         VectorSimilarityFunction similarityFunction) {
-      assert targetVectors.getQueryScalarEncoding() != targetVectors.getScalarEncoding();
-      assert queryVectors.getScalarEncoding() == targetVectors.getQueryScalarEncoding();
-      assert queryVectors.getScalarEncoding() == queryVectors.getQueryScalarEncoding();
+      assert targetVectors.getScalarEncoding().isAsymmetric();
       this.queryVectors = queryVectors;
       this.targetVectors = targetVectors;
       this.similarityFunction = similarityFunction;
@@ -155,15 +146,16 @@ public UpdateableRandomVectorScorer scorer() throws IOException {
 
         @Override
         public void setScoringOrdinal(int node) throws IOException {
-          queryCorrections = queryVectors.getCorrectiveTerms(node);
           vector = queryVectors.vectorValue(node);
+          queryCorrections = queryVectors.getCorrectiveTerms(node);
         }
 
         @Override
         public float score(int node) throws IOException {
           if (vector == null || queryCorrections == null) {
             throw new IllegalStateException("setScoringOrdinal was not called");
           }
+
           return quantizedScore(vector, queryCorrections, targetVectors, node, similarityFunction);
         }
       };
@@ -184,7 +176,7 @@ private static final class ScalarQuantizedVectorScorerSupplier
 
     public ScalarQuantizedVectorScorerSupplier(
         QuantizedByteVectorValues values, VectorSimilarityFunction similarity) throws IOException {
-      assert values.getQueryScalarEncoding() == values.getScalarEncoding();
+      assert values.getScalarEncoding().isAsymmetric() == false;
       this.targetValues = values.copy();
       this.values = values;
       this.similarity = similarity;
@@ -212,9 +204,9 @@ public void setScoringOrdinal(int node) throws IOException {
               }
               OffHeapScalarQuantizedVectorValues.unpackNibbles(rawTargetVector, targetVector);
             }
-            case SINGLE_BIT -> {
+            case SINGLE_BIT_QUERY_NIBBLE -> {
               throw new IllegalStateException(
-                  "SINGLE_BIT encoding is not supported for symmetric quantization");
+                  "SINGLE_BIT_QUERY_NIBBLE encoding is not supported for symmetric quantization");
             }
           }
           targetCorrectiveTerms = targetValues.getCorrectiveTerms(node);
@@ -248,18 +240,18 @@ private static float quantizedScore(
       VectorSimilarityFunction similarityFunction)
       throws IOException {
     var scalarEncoding = targetVectors.getScalarEncoding();
-    var queryScalarEncoding = targetVectors.getQueryScalarEncoding();
     byte[] quantizedDoc = targetVectors.vectorValue(targetOrd);
     float qcDist =
         switch (scalarEncoding) {
           case UNSIGNED_BYTE -> VectorUtil.uint8DotProduct(quantizedQuery, quantizedDoc);
           case SEVEN_BIT -> VectorUtil.dotProduct(quantizedQuery, quantizedDoc);
           case PACKED_NIBBLE -> VectorUtil.int4DotProductSinglePacked(quantizedQuery, quantizedDoc);
-          case SINGLE_BIT -> VectorUtil.int4BitDotProduct(quantizedQuery, quantizedDoc);
+          case SINGLE_BIT_QUERY_NIBBLE ->
+              VectorUtil.int4BitDotProduct(quantizedQuery, quantizedDoc);
         };
     OptimizedScalarQuantizer.QuantizationResult indexCorrections =
         targetVectors.getCorrectiveTerms(targetOrd);
-    float queryScale = SCALE_LUT[queryScalarEncoding.getBits() - 1];
+    float queryScale = SCALE_LUT[scalarEncoding.getQueryBits() - 1];
     float scale = SCALE_LUT[scalarEncoding.getBits() - 1];
     float x1 = indexCorrections.quantizedComponentSum();
     float ax = indexCorrections.lowerInterval();
 
@@ -79,7 +79,6 @@
  * <ul>
  *   <li><b>int</b> the field number
  *   <li><b>int</b> the vector encoding ordinal
- *   <li><b>int</b> the query encoding ordinal
  *   <li><b>int</b> the vector similarity ordinal
  *   <li><b>vint</b> the vector dimensions
  *   <li><b>vlong</b> the offset to the vector data in the .veq file
@@ -110,7 +109,6 @@ public class Lucene104ScalarQuantizedVectorsFormat extends FlatVectorsFormat {
       new Lucene104ScalarQuantizedVectorScorer(FlatVectorScorerUtil.getLucene99FlatVectorsScorer());
 
   private final ScalarEncoding encoding;
-  private final ScalarEncoding queryEncoding;
 
   /**
    * Allowed encodings for scalar quantization.
@@ -132,14 +130,13 @@ public enum ScalarEncoding {
      */
     SEVEN_BIT(2, (byte) 7, 8),
     /**
-     * Each dimension is quantized to a single bit and packed into bytes.
+     * Each dimension is quantized to a single bit and packed into bytes. During query time, the
+     * query vector is quantized to 4 bits per dimension.
      *
      * <p>This is the most space efficient encoding, and will produce an index 8x smaller than
-     * {@link #UNSIGNED_BYTE}. However, this comes at the cost of accuracy. This encoding is
-     * recommended for use when the number of dimensions is high (e.g. &gt; 128) and with an
-     * asymmetric quantization scheme where query vectors are quantized to 4 bits.
+     * {@link #UNSIGNED_BYTE}. However, this comes at the cost of accuracy.
      */
-    SINGLE_BIT(3, (byte) 1, 1);
+    SINGLE_BIT_QUERY_NIBBLE(3, (byte) 1, 1, (byte) 4, 4);
 
     public static ScalarEncoding fromNumBits(int bits) {
       for (ScalarEncoding encoding : values()) {
@@ -153,13 +150,27 @@ public static ScalarEncoding fromNumBits(int bits) {
     /** The number used to identify this encoding on the wire, rather than relying on ordinal. */
     private final int wireNumber;
 
-    private final byte bits;
-    private final int bitsPerDim;
+    private final byte bits, queryBits;
+    private final int bitsPerDim, queryBitsPerDim;
 
     ScalarEncoding(int wireNumber, byte bits, int bitsPerDim) {
       this.wireNumber = wireNumber;
       this.bits = bits;
+      this.queryBits = bits;
       this.bitsPerDim = bitsPerDim;
+      this.queryBitsPerDim = bitsPerDim;
+    }
+
+    ScalarEncoding(int wireNumber, byte bits, int bitsPerDim, byte queryBits, int queryBitsPerDim) {
+      this.wireNumber = wireNumber;
+      this.bits = bits;
+      this.queryBits = queryBits;
+      this.bitsPerDim = bitsPerDim;
+      this.queryBitsPerDim = queryBitsPerDim;
+    }
+
+    boolean isAsymmetric() {
+      return bits != queryBits;
     }
 
     int getWireNumber() {
@@ -171,20 +182,48 @@ public byte getBits() {
       return bits;
     }
 
+    public byte getQueryBits() {
+      return queryBits;
+    }
+
     /** Return the number of dimensions rounded up to fit into whole bytes. */
     public int getDiscreteDimensions(int dimensions) {
-      int totalBits = dimensions * bitsPerDim;
-      return (totalBits + 7) / 8 * 8 / bitsPerDim;
+      if (queryBits == bits) {
+        int totalBits = dimensions * bitsPerDim;
+        return (totalBits + 7) / 8 * 8 / bitsPerDim;
+      }
+      int queryDiscretized = (dimensions * queryBitsPerDim + 7) / 8 * 8 / queryBitsPerDim;
+      int docDiscretized = (dimensions * bitsPerDim + 7) / 8 * 8 / bitsPerDim;
+      int maxDiscretized = Math.max(queryDiscretized, docDiscretized);
+      assert maxDiscretized % (8.0 / queryBitsPerDim) == 0
+          : "bad discretized=" + maxDiscretized + " for dim=" + dimensions;
+      assert maxDiscretized % (8.0 / bitsPerDim) == 0
+          : "bad discretized=" + maxDiscretized + " for dim=" + dimensions;
+      return maxDiscretized;
     }
 
     /** Return the number of dimensions that can be packed into a single byte. */
-    public int getBitsPerDim() {
+    public int getDocBitsPerDim() {
       return this.bitsPerDim;
     }
 
+    public int getQueryBitsPerDim() {
+      return this.queryBitsPerDim;
+    }
+
     /** Return the number of bytes required to store a packed vector of the given dimensions. */
-    public int getPackedLength(int dimensions) {
-      return (dimensions * bitsPerDim + 7) / 8;
+    public int getDocPackedLength(int dimensions) {
+      int discretized = getDiscreteDimensions(dimensions);
+      // how many bytes do we need to store the quantized vector?
+      int totalBits = discretized * bitsPerDim;
+      return (totalBits + 7) / 8;
+    }
+
+    public int getQueryPackedLength(int dimensions) {
+      int discretized = getDiscreteDimensions(dimensions);
+      // how many bytes do we need to store the quantized vector?
+      int totalBits = discretized * queryBitsPerDim;
+      return (totalBits + 7) / 8;
     }
 
     /** Returns the encoding for the given wire number, or empty if unknown. */
@@ -203,35 +242,16 @@ public Lucene104ScalarQuantizedVectorsFormat() {
     this(ScalarEncoding.UNSIGNED_BYTE);
   }
 
-  /** Creates a new instance with the chosen symmetric quantization encoding. */
+  /** Creates a new instance with the chosen quantization encoding. */
   public Lucene104ScalarQuantizedVectorsFormat(ScalarEncoding encoding) {
-    this(encoding, encoding);
-  }
-
-  /** Creates a new instance with the chosen asymmetric quantization encoding. */
-  public Lucene104ScalarQuantizedVectorsFormat(
-      ScalarEncoding encoding, ScalarEncoding queryEncoding) {
     super(NAME);
     this.encoding = encoding;
-    this.queryEncoding = queryEncoding;
-    // until we have optimized scorers for various other asymmetric encodings, maybe we only allow 1
-    // bit -> 4 bit
-    // Technically, we should be able to do 2 bit -> 4 bit, and 1, 2 -> 8, and 4 -> 8. But these
-    // will take time to
-    // have optimized scorers, and we don't want users to accidentally use poorly optimized
-    // combinations.
-    if (encoding != queryEncoding) {
-      if (encoding != ScalarEncoding.SINGLE_BIT || queryEncoding != ScalarEncoding.PACKED_NIBBLE) {
-        throw new IllegalArgumentException(
-            "Only SINGLE_BIT -> PACKED_NIBBLE asymmetric encoding is supported");
-      }
-    }
   }
 
   @Override
   public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
     return new Lucene104ScalarQuantizedVectorsWriter(
-        state, encoding, queryEncoding, rawVectorFormat.fieldsWriter(state), scorer);
+        state, encoding, rawVectorFormat.fieldsWriter(state), scorer);
   }
 
   @Override
@@ -251,8 +271,6 @@ public String toString() {
         + NAME
         + ", encoding="
         + encoding
-        + ", queryEncoding="
-        + queryEncoding
         + ", flatVectorScorer="
         + scorer
         + ", rawVectorFormat="
 
@@ -141,7 +141,7 @@ static void validateFieldEntry(FieldInfo info, FieldEntry fieldEntry) {
 
     long numQuantizedVectorBytes =
         Math.multiplyExact(
-            (fieldEntry.scalarEncoding.getPackedLength(dimension)
+            (fieldEntry.scalarEncoding.getDocPackedLength(dimension)
                 + (Float.BYTES * 3)
                 + Integer.BYTES),
             (long) fieldEntry.size);
@@ -173,7 +173,6 @@ public RandomVectorScorer getRandomVectorScorer(String field, float[] target) th
             fi.size,
             new OptimizedScalarQuantizer(fi.similarityFunction),
             fi.scalarEncoding,
-            fi.queryEncoding,
             fi.similarityFunction,
             vectorScorer,
             fi.centroid,
@@ -217,7 +216,6 @@ public FloatVectorValues getFloatVectorValues(String field) throws IOException {
             fi.size,
             new OptimizedScalarQuantizer(fi.similarityFunction),
             fi.scalarEncoding,
-            fi.queryEncoding,
             fi.similarityFunction,
             vectorScorer,
             fi.centroid,
@@ -367,7 +365,6 @@ public org.apache.lucene.util.quantization.QuantizedByteVectorValues getQuantize
             fi.size,
             new OptimizedScalarQuantizer(fi.similarityFunction),
             fi.scalarEncoding,
-            fi.queryEncoding,
             fi.similarityFunction,
             vectorScorer,
             fi.centroid,
@@ -411,7 +408,6 @@ private record FieldEntry(
       long vectorDataLength,
       int size,
       ScalarEncoding scalarEncoding,
-      ScalarEncoding queryEncoding,
       float[] centroid,
       float centroidDP,
       OrdToDocDISIReaderConfiguration ordToDocDISIReaderConfiguration) {
@@ -428,7 +424,6 @@ static FieldEntry create(
       final float[] centroid;
       float centroidDP = 0;
       ScalarEncoding scalarEncoding = ScalarEncoding.UNSIGNED_BYTE;
-      ScalarEncoding queryEncoding = ScalarEncoding.UNSIGNED_BYTE;
       if (size > 0) {
         int wireNumber = input.readVInt();
         scalarEncoding =
@@ -437,13 +432,6 @@ static FieldEntry create(
                     () ->
                         new IllegalStateException(
                             "Could not get ScalarEncoding from wire number: " + wireNumber));
-        int queryWireNumber = input.readVInt();
-        queryEncoding =
-            ScalarEncoding.fromWireNumber(queryWireNumber)
-                .orElseThrow(
-                    () ->
-                        new IllegalStateException(
-                            "Could not get ScalarEncoding from wire number: " + queryWireNumber));
         centroid = new float[dimension];
         input.readFloats(centroid, 0, dimension);
         centroidDP = Float.intBitsToFloat(input.readInt());
@@ -460,7 +448,6 @@ static FieldEntry create(
           vectorDataLength,
           size,
           scalarEncoding,
-          queryEncoding,
           centroid,
           centroidDP,
           conf);