diff --git a/lucene/core/src/java21/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java b/lucene/core/src/java21/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java index 876b037ad3c7..c85f51567ba7 100644 --- a/lucene/core/src/java21/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java +++ b/lucene/core/src/java21/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java @@ -702,7 +702,7 @@ private static float cosineBody(ByteVectorLoader a, ByteVectorLoader b) { // only vectorize if we'll at least enter the loop a single time, and we have at least 128-bit // vectors (256-bit on intel to dodge performance landmines) if (a.length() >= 16 && PanamaVectorConstants.HAS_FAST_INTEGER_VECTORS) { - final float[] ret; + final int[] ret; if (VECTOR_BITSIZE >= 512) { i += BYTE_SPECIES.loopBound(a.length()); ret = cosineBody512(a, b, i); @@ -731,7 +731,7 @@ private static float cosineBody(ByteVectorLoader a, ByteVectorLoader b) { } /** vectorized cosine body (512 bit vectors) */ - private static float[] cosineBody512(ByteVectorLoader a, ByteVectorLoader b, int limit) { + private static int[] cosineBody512(ByteVectorLoader a, ByteVectorLoader b, int limit) { IntVector accSum = IntVector.zero(INT_SPECIES); IntVector accNorm1 = IntVector.zero(INT_SPECIES); IntVector accNorm2 = IntVector.zero(INT_SPECIES); @@ -755,13 +755,13 @@ private static float[] cosineBody512(ByteVectorLoader a, ByteVectorLoader b, int accSum = accSum.add(prod32); } // reduce - return new float[] { + return new int[] { accSum.reduceLanes(ADD), accNorm1.reduceLanes(ADD), accNorm2.reduceLanes(ADD) }; } /** vectorized cosine body (256 bit vectors) */ - private static float[] cosineBody256(ByteVectorLoader a, ByteVectorLoader b, int limit) { + private static int[] cosineBody256(ByteVectorLoader a, ByteVectorLoader b, int limit) { IntVector accSum = IntVector.zero(IntVector.SPECIES_256); IntVector accNorm1 = IntVector.zero(IntVector.SPECIES_256); IntVector accNorm2 = IntVector.zero(IntVector.SPECIES_256); @@ -780,13 +780,13 @@ private static float[] cosineBody256(ByteVectorLoader a, ByteVectorLoader b, int accSum = accSum.add(prod32); } // reduce - return new float[] { + return new int[] { accSum.reduceLanes(ADD), accNorm1.reduceLanes(ADD), accNorm2.reduceLanes(ADD) }; } /** vectorized cosine body (128 bit vectors) */ - private static float[] cosineBody128(ByteVectorLoader a, ByteVectorLoader b, int limit) { + private static int[] cosineBody128(ByteVectorLoader a, ByteVectorLoader b, int limit) { IntVector accSum = IntVector.zero(IntVector.SPECIES_128); IntVector accNorm1 = IntVector.zero(IntVector.SPECIES_128); IntVector accNorm2 = IntVector.zero(IntVector.SPECIES_128); @@ -807,7 +807,7 @@ private static float[] cosineBody128(ByteVectorLoader a, ByteVectorLoader b, int accSum = accSum.add(prod16.convertShape(S2I, IntVector.SPECIES_128, 0)); } // reduce - return new float[] { + return new int[] { accSum.reduceLanes(ADD), accNorm1.reduceLanes(ADD), accNorm2.reduceLanes(ADD) }; }