Skip to content

Commit 326de14

Browse files
authored
vector basics; matrixes, serialization and half-precision floating point support (#3677)
- contains a new package called `half`: - contains a modified version of the `Half` class as provided in Half4J. - contains a new package called `linear`: - `RealVector` as well as three implementations `HalfRealVector`, `FloatRealVector`, `DoubleRealVector` in `linear` - operations on vectors - linear operator - `RealMatrix` as dense matrix, various implementations, operations on matrices - `Metrics` as implementation of different kinds of metrics - `FhtKacRotator` to create random orthogonal matrices as needed by rabitq - `QRDecomposition` using Householder reflections to create random orthogonal matrices - contains a new package called `rabitq`: - rabitq implementation (extended version): quantizer and estimator, encodings - `EncodedRealVector` as implementation of `RealVector` as drop in for any `RealVector`
1 parent 29e4331 commit 326de14

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+6661
-1
lines changed

ACKNOWLEDGEMENTS

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,3 +216,27 @@ Unicode, Inc (ICU4J)
216216
Creative Commons Attribution 4.0 License (GeoNames)
217217

218218
https://creativecommons.org/licenses/by/4.0/
219+
220+
Christian Heina (HALF4J)
221+
222+
Copyright 2023 Christian Heina
223+
224+
Licensed under the Apache License, Version 2.0 (the "License");
225+
you may not use this file except in compliance with the License.
226+
You may obtain a copy of the License at
227+
228+
http://www.apache.org/licenses/LICENSE-2.0
229+
230+
Unless required by applicable law or agreed to in writing, software
231+
distributed under the License is distributed on an "AS IS" BASIS,
232+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
233+
See the License for the specific language governing permissions and
234+
limitations under the License.
235+
236+
Jianyang Gao, Yutong Gou, Yuexuan Xu, Yongyi Yang, Cheng Long, Raymond Chi-Wing Wong,
237+
"Practical and Asymptotically Optimal Quantization of High-Dimensional Vectors in Euclidean Space for
238+
Approximate Nearest Neighbor Search",
239+
SIGMOD 2025, available at https://arxiv.org/abs/2409.09913
240+
241+
Yutong Gou, Jianyang Gao, Yuexuan Xu, Jifan Shi and Zhonghao Yang
242+
https://github.com/VectorDB-NTU/RaBitQ-Library/blob/main/LICENSE

fdb-extensions/fdb-extensions.gradle

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,38 @@ dependencies {
4242
testFixturesAnnotationProcessor(libs.autoService)
4343
}
4444

45+
def siftSmallFile = layout.buildDirectory.file('downloads/siftsmall.tar.gz')
46+
def extractDir = layout.buildDirectory.dir("extracted")
47+
48+
// Task that downloads the CSV exactly once unless it changed
49+
tasks.register('downloadSiftSmall', de.undercouch.gradle.tasks.download.Download) {
50+
src 'https://huggingface.co/datasets/vecdata/siftsmall/resolve/3106e1b83049c44713b1ce06942d0ab474bbdfb6/siftsmall.tar.gz'
51+
dest siftSmallFile.get().asFile
52+
onlyIfModified true
53+
tempAndMove true
54+
retries 3
55+
}
56+
57+
tasks.register('extractSiftSmall', Copy) {
58+
dependsOn 'downloadSiftSmall'
59+
from(tarTree(resources.gzip(siftSmallFile)))
60+
into extractDir
61+
62+
doLast {
63+
println "Extracted files into: ${extractDir.get().asFile}"
64+
fileTree(extractDir).visit { details ->
65+
if (!details.isDirectory()) {
66+
println " - ${details.file}"
67+
}
68+
}
69+
}
70+
}
71+
72+
test {
73+
dependsOn tasks.named('extractSiftSmall')
74+
inputs.dir extractDir
75+
}
76+
4577
publishing {
4678
publications {
4779
library(MavenPublication) {

fdb-extensions/src/main/java/com/apple/foundationdb/async/MoreAsyncUtil.java

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,14 @@
2323
import com.apple.foundationdb.annotation.API;
2424
import com.apple.foundationdb.util.LoggableException;
2525
import com.google.common.base.Suppliers;
26+
import com.google.common.collect.Lists;
2627
import com.google.common.util.concurrent.ThreadFactoryBuilder;
2728

2829
import javax.annotation.Nonnull;
2930
import javax.annotation.Nullable;
3031
import java.util.ArrayDeque;
3132
import java.util.ArrayList;
33+
import java.util.Arrays;
3234
import java.util.Collections;
3335
import java.util.Iterator;
3436
import java.util.List;
@@ -42,9 +44,13 @@
4244
import java.util.concurrent.ScheduledThreadPoolExecutor;
4345
import java.util.concurrent.ThreadFactory;
4446
import java.util.concurrent.TimeUnit;
47+
import java.util.concurrent.atomic.AtomicInteger;
48+
import java.util.concurrent.atomic.AtomicReference;
4549
import java.util.function.BiConsumer;
4650
import java.util.function.BiFunction;
4751
import java.util.function.Function;
52+
import java.util.function.IntPredicate;
53+
import java.util.function.IntUnaryOperator;
4854
import java.util.function.Predicate;
4955
import java.util.function.Supplier;
5056

@@ -1051,6 +1057,93 @@ public static CompletableFuture<Void> swallowException(@Nonnull CompletableFutur
10511057
return result;
10521058
}
10531059

1060+
/**
1061+
* Method that provides the functionality of a for loop, however, in an asynchronous way. The result of this method
1062+
* is a {@link CompletableFuture} that represents the result of the last iteration of the loop body.
1063+
* @param startI an integer analogous to the starting value of a loop variable in a for loop
1064+
* @param startU an object of some type {@code U} that represents some initial state that is passed to the loop's
1065+
* initial state
1066+
* @param conditionPredicate a predicate on the loop variable that must be true before the next iteration is
1067+
* entered; analogous to the condition in a for loop
1068+
* @param stepFunction a unary operator used for modifying the loop variable after each iteration
1069+
* @param body a bi-function to be called for each iteration; this function is initially invoked using
1070+
* {@code startI} and {@code startU}; the result of the body is then passed into the next iterator's body
1071+
* together with a new value for the loop variable. In this way callers can access state inside an iteration
1072+
* that was computed in a previous iteration.
1073+
* @param executor the executor
1074+
* @param <U> the type of the result of the body {@link BiFunction}
1075+
* @return a {@link CompletableFuture} containing the result of the last iteration's body invocation.
1076+
*/
1077+
@Nonnull
1078+
public static <U> CompletableFuture<U> forLoop(final int startI, @Nullable final U startU,
1079+
@Nonnull final IntPredicate conditionPredicate,
1080+
@Nonnull final IntUnaryOperator stepFunction,
1081+
@Nonnull final BiFunction<Integer, U, CompletableFuture<U>> body,
1082+
@Nonnull final Executor executor) {
1083+
final AtomicInteger loopVariableAtomic = new AtomicInteger(startI);
1084+
final AtomicReference<U> lastResultAtomic = new AtomicReference<>(startU);
1085+
return whileTrue(() -> {
1086+
final int loopVariable = loopVariableAtomic.get();
1087+
if (!conditionPredicate.test(loopVariable)) {
1088+
return AsyncUtil.READY_FALSE;
1089+
}
1090+
return body.apply(loopVariable, lastResultAtomic.get())
1091+
.thenApply(result -> {
1092+
loopVariableAtomic.set(stepFunction.applyAsInt(loopVariable));
1093+
lastResultAtomic.set(result);
1094+
return true;
1095+
});
1096+
}, executor).thenApply(ignored -> lastResultAtomic.get());
1097+
}
1098+
1099+
/**
1100+
* Method to iterate over some items, for each of which a body is executed asynchronously. The result of each such
1101+
* executed is then collected in a list and returned as a {@link CompletableFuture} over that list.
1102+
* @param items the items to iterate over
1103+
* @param body a function to be called for each item
1104+
* @param parallelism the maximum degree of parallelism this method should use
1105+
* @param executor the executor
1106+
* @param <T> the type of item
1107+
* @param <U> the type of the result
1108+
* @return a {@link CompletableFuture} containing a list of results collected from the individual body invocations
1109+
*/
1110+
@Nonnull
1111+
@SuppressWarnings("unchecked")
1112+
public static <T, U> CompletableFuture<List<U>> forEach(@Nonnull final Iterable<T> items,
1113+
@Nonnull final Function<T, CompletableFuture<U>> body,
1114+
final int parallelism,
1115+
@Nonnull final Executor executor) {
1116+
// this deque is only modified by once upon creation
1117+
final ArrayDeque<T> toBeProcessed = new ArrayDeque<>();
1118+
for (final T item : items) {
1119+
toBeProcessed.addLast(item);
1120+
}
1121+
1122+
final List<CompletableFuture<Void>> working = Lists.newArrayList();
1123+
final AtomicInteger indexAtomic = new AtomicInteger(0);
1124+
final Object[] resultArray = new Object[toBeProcessed.size()];
1125+
1126+
return whileTrue(() -> {
1127+
working.removeIf(CompletableFuture::isDone);
1128+
1129+
while (working.size() <= parallelism) {
1130+
final T currentItem = toBeProcessed.pollFirst();
1131+
if (currentItem == null) {
1132+
break;
1133+
}
1134+
1135+
final int index = indexAtomic.getAndIncrement();
1136+
working.add(body.apply(currentItem)
1137+
.thenAccept(result -> resultArray[index] = result));
1138+
}
1139+
1140+
if (working.isEmpty()) {
1141+
return AsyncUtil.READY_FALSE;
1142+
}
1143+
return whenAny(working).thenApply(ignored -> true);
1144+
}, executor).thenApply(ignored -> Arrays.asList((U[])resultArray));
1145+
}
1146+
10541147
/**
10551148
* A {@code Boolean} function that is always true.
10561149
* @param <T> the type of the (ignored) argument to the function

fdb-extensions/src/main/java/com/apple/foundationdb/async/rtree/StorageAdapter.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
* Storage adapter used for serialization and deserialization of nodes.
3737
*/
3838
interface StorageAdapter {
39-
4039
/**
4140
* Get the {@link RTree.Config} associated with this storage adapter.
4241
* @return the configuration used by this storage adapter

0 commit comments

Comments
 (0)