diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..57091e0
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,32 @@
+BasedOnStyle: LLVM
+
+# Indentation
+IndentWidth: 4
+TabWidth: 4
+
+# Braces
+BreakBeforeBraces: Attach
+AllowShortFunctionsOnASingleLine: InlineOnly
+
+ColumnLimit: 100
+
+# Pointer/reference alignment
+PointerAlignment: Left
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignAfterOpenBracket: BlockIndent
+AlwaysBreakTemplateDeclarations: Yes
+
+# Spaces
+SpaceBeforeParens: ControlStatements
+SpaceAfterCStyleCast: true
+SpacesInParentheses: false
+
+
+BinPackParameters: false
+AllowAllParametersOfDeclarationOnNextLine: false
+AlwaysBreakAfterReturnType: None
+PenaltyReturnTypeOnItsOwnLine: 1024
+
+BinPackArguments: false
+AllowAllArgumentsOnNextLine: true
\ No newline at end of file
diff --git a/.clang-tidy b/.clang-tidy
new file mode 100644
index 0000000..86cf168
--- /dev/null
+++ b/.clang-tidy
@@ -0,0 +1,22 @@
+Checks: >
+ -*,
+ bugprone-*,
+ clang-analyzer-*,
+ cppcoreguidelines-virtual-class-destructor,
+ modernize-pass-by-value,
+ modernize-use-emplace,
+ modernize-use-nullptr,
+ modernize-use-override,
+ modernize-use-using,
+ performance-*,
+ readability-redundant-*,
+ -bugprone-easily-swappable-parameters,
+ -performance-avoid-endl
+
+WarningsAsErrors: ''
+
+CheckOptions:
+ - key: bugprone-narrowing-conversions.WarnOnEquivalentBitWidth
+ value: false
+
+HeaderFilterRegex: 'include/superkmeans/.*'
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..3b91e04
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,154 @@
+name: CI
+
+on:
+ push:
+ branches: [main]
+ paths-ignore:
+ - '**.md'
+ - 'LICENSE'
+ - '.gitignore'
+ pull_request:
+ branches: [main]
+ paths-ignore:
+ - '**.md'
+ - 'LICENSE'
+ - '.gitignore'
+
+jobs:
+ format-check:
+ runs-on: ubuntu-24.04
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: actions/setup-python@v5
+ with:
+ python-version: "3.12"
+
+ - name: Install clang-format 18.1.8
+ run: pip install clang-format==18.1.8
+
+ - name: Check C++ formatting
+ run: |
+ clang-format --version
+ ./scripts/format_check.sh
+
+ tidy-check:
+ runs-on: ubuntu-24.04
+ env:
+ CC: clang-18
+ CXX: clang++-18
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ submodules: recursive
+
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y clang-18 clang-tidy-18 libomp-18-dev libopenblas-dev cmake
+ sudo ln -sf /usr/bin/clang-tidy-18 /usr/local/bin/clang-tidy
+
+ - name: Configure
+ run: cmake -B build -DPDX_COMPILE_TESTS=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
+
+ - name: Build
+ run: cmake --build build -j$(nproc)
+
+ - name: Run clang-tidy
+ run: |
+ ln -s build/compile_commands.json compile_commands.json
+ ./scripts/tidy_check.sh
+
+ cpp-build-and-test:
+ runs-on: ubuntu-24.04
+ env:
+ CC: clang-18
+ CXX: clang++-18
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ submodules: recursive
+
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y clang-18 libomp-18-dev libopenblas-dev cmake
+
+ - name: Configure
+ run: cmake -B build -DPDX_COMPILE_TESTS=ON -DCMAKE_BUILD_TYPE=Release
+
+ - name: Build tests
+ run: cmake --build build -j$(nproc) --target tests
+
+ - name: Run tests
+ run: ctest --test-dir build --output-on-failure
+
+ python:
+ runs-on: ubuntu-24.04
+ env:
+ CC: clang-18
+ CXX: clang++-18
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ submodules: recursive
+
+ - name: Install system dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y clang-18 libomp-18-dev libopenblas-dev cmake
+
+ - uses: actions/setup-python@v5
+ with:
+ python-version: "3.12"
+
+ - name: Install Python bindings
+ run: pip install .
+
+ - name: Verify import
+ run: python -c "import pdxearch; print('pdxearch imported successfully')"
+
+ sanitizers-asan-ubsan:
+ runs-on: ubuntu-24.04
+ env:
+ CC: clang-18
+ CXX: clang++-18
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ submodules: recursive
+
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y clang-18 libomp-18-dev libopenblas-dev cmake
+
+ - name: Configure with ASan + UBSan
+ run: |
+ cmake -B build_asan -DPDX_COMPILE_TESTS=ON \
+ -DCMAKE_BUILD_TYPE=Debug \
+ -DCMAKE_CXX_FLAGS="-fsanitize=address,undefined -fno-omit-frame-pointer" \
+ -DCMAKE_C_FLAGS="-fsanitize=address,undefined -fno-omit-frame-pointer"
+
+ - name: Build tests
+ run: cmake --build build_asan -j$(nproc) --target tests
+
+ - name: Run tests
+ run: ctest --test-dir build_asan --output-on-failure
+
+ ci-pass:
+ runs-on: ubuntu-latest
+ if: always()
+ needs:
+ - format-check
+ - tidy-check
+ - cpp-build-and-test
+ - python
+ - sanitizers-asan-ubsan
+ steps:
+ - name: Check all jobs passed
+ run: |
+ if [[ "${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
+ echo "One or more jobs failed or were cancelled"
+ exit 1
+ fi
diff --git a/.gitignore b/.gitignore
index 7235362..780116a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,6 +42,11 @@ venv
/cmake-build-debug/
/cmake-build-release/
/cmake-build*/
+/build_debug/
+/build_release/
+/build_relwithdebinfo/
+/tests/cmake_test_discovery_*.json
+compile_commands.json
/.idea/
/dummy/
/Testing/
@@ -62,10 +67,10 @@ pdxearch.egg-info
/benchmarks/core_indexes/faiss_l0/*
!/benchmarks/core_indexes/faiss_l0/*.json
-/benchmarks/datasets/adsampling_nary
/benchmarks/datasets/adsampling_pdx
/benchmarks/datasets/downloaded
-/benchmarks/datasets/nary
+/benchmarks/datasets/raw
+/benchmarks/datasets/faiss
/benchmarks/datasets/pdx
/benchmarks/datasets/purescan
/benchmarks/datasets/queries
@@ -91,25 +96,12 @@ cmake_install.cmake
/benchmarks/milvus/volumes/
/benchmarks/python_scripts/indexes
-/benchmarks/BenchmarkNaryIVFADSampling
-/benchmarks/BenchmarkNaryIVFADSamplingSIMD
-/benchmarks/BenchmarkPDXADSampling
-/benchmarks/BenchmarkIVF2ADSampling
-/benchmarks/FilteredBenchmarkPDXADSampling
-/benchmarks/FilteredBenchmarkU8IVF2ADSampling
-/benchmarks/BenchmarkASYM_U8PDXADSampling
-/benchmarks/BenchmarkU8PDXADSampling
-/benchmarks/BenchmarkLEP8PDXADSampling
-/benchmarks/BenchmarkPDXIVFBOND
-/benchmarks/BenchmarkPDXBOND
-/benchmarks/BenchmarkPDXLinearScan
-/benchmarks/BenchmarkU*
-/benchmarks/G4*
-/benchmarks/BenchmarkNaryIVFLinearScan
-/benchmarks/KernelPDXL1
-/benchmarks/KernelPDXL2
-/benchmarks/KernelPDXIP
-/benchmarks/KernelNaryL1
-/benchmarks/KernelNaryL2
-/benchmarks/KernelNaryIP
-/benchmarks/BenchmarkU8*
\ No newline at end of file
+/benchmarks/BenchmarkEndToEnd
+/benchmarks/BenchmarkSerialization
+/benchmarks/BenchmarkPDXIVF
+/benchmarks/BenchmarkFiltered
+/benchmarks/BenchmarkSpecialFilters
+
+# Test binaries (but keep the committed test data)
+*.bin
+!tests/test_data.bin
\ No newline at end of file
diff --git a/.gitmodules b/.gitmodules
index 9b9ae47..4f1e28d 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -8,3 +8,6 @@
[submodule "extern/findFFTW"]
path = extern/findFFTW
url = https://github.com/egpbos/findfftw.git
+[submodule "extern/SuperKMeans"]
+ path = extern/SuperKMeans
+ url = https://github.com/lkuffo/SuperKMeans
diff --git a/BENCHMARKING.md b/BENCHMARKING.md
index 5a66415..456849b 100644
--- a/BENCHMARKING.md
+++ b/BENCHMARKING.md
@@ -1,21 +1,49 @@
-# Benchmarking
+# Benchmarks
+
+We present single-threaded **benchmarks** against FAISS+AVX512 on an `r7iz.xlarge` (Intel Sapphire Rapids) instance.
+
+### Two-Level IVF (IVF2) 
+IVF2 tackles a bottleneck of IVF indexes: finding the nearest centroids. By clustering the original IVF centroids, we can use PDX to quickly scan them (thanks to pruning) without sacrificing recall. This achieves significant throughput improvements when paired with `8-bit` quantization. Within the codebase, we refer to this index as `PDXTree`.
+
+
+
+
+
+### Vanilla IVF
+Here, PDX, paired with the pruning algorithm ADSampling on `float32`, achieves significant speedups.
+
+
+
+
-We provide a master script that setups the entire benchmarking suite for you.
+
+### Exhaustive search + IVF
+An exhaustive search scans all the vectors in the collection. Having an IVF index with PDX can **EXTREMELY** accelerate this without sacrificing recall, thanks to the reliable pruning of ADSampling.
+
+
+
+
+
+The key observation here is that thanks to the underlying IVF index, the exhaustive search starts with the most promising clusters. A tight threshold is found early on, which enables the quick pruning of most candidates.
+
+### No pruning and no index
+Even without pruning, PDX distance kernels can be faster than SIMD ones in most CPU microarchitectures. For detailed information, check Figure 3 of [our publication](https://ir.cwi.nl/pub/35044/35044.pdf). You can also try it yourself in our playground [here](./benchmarks/kernels_playground).
+
+# Benchmarking
## Setting up Data
-To download all the datasets and generate all the indexes needed to run our benchmarking suite, you can use the script [/benchmarks/python_scripts/setup_data.py](/benchmarks/python_scripts/setup_data.py). For this, you need Python 3.11 or higher and install the dependencies in `/benchmarks/python_scripts/requirements.txt`.
+To download all the datasets and generate all the indexes needed to run our benchmarking suite, you can use the script [setup_data.py](/benchmarks/python_scripts/setup_data.py). For this, you need Python 3.11 or higher and install the dependencies in `/benchmarks/python_scripts/requirements.txt`.
-> [!CAUTION]
-> You will need roughly 300GB of disk for ALL the indexes of the datasets used in our paper.
+Run the script from the root folder with the script flags `DOWNLOAD` and `GENERATE_IVF` set to `True`. You do not need to generate the `ground_truth` for k <= 100 as it is already present.
-Run the script from the root folder with the script flags `DOWNLOAD` and `GENERATE_IVF` set to `True` and the values in the `ALGORITHMS` array uncommented. You do not need to generate the `ground_truth` for k <= 100 as it is already present.
+You can specify the datasets you wish to create indexes for on the `DATASETS_TO_USE` array in [setup_data.py](/benchmarks/python_scripts/setup_data.py).
-You can specify the datasets you wish to create indexes for on the `DATASETS_TO_USE` array in the master script.
```sh
pip install -r ./benchmarks/python_scripts/requirements.txt
python ./benchmarks/python_scripts/setup_data.py
```
+
The indexes will be created under the `/benchmarks/datasets/` directory.
### Manually downloading data
@@ -25,80 +53,56 @@ You can also:
Then, run the Master Script with the flag `DOWNLOAD = False`.
-You can specify the datasets you wish to create indexes for on the `DATASETS_TO_USE` array in the master script.
+You can specify the datasets you wish to create indexes for on the `DATASETS_TO_USE` array in [setup_data.py](/benchmarks/python_scripts/setup_data.py).
### Configuring the IVF indexes
-Configure the IVF indexes in [/benchmarks/python_scripts/setup_core_index.py](/benchmarks/python_scripts/setup_core_index.py). The benchmarks presented in our publication use `n_buckets = 2 * sqrt(n)` for the number of inverted lists (buckets) and `n_training_points = 50 * n_buckets`. This will create solid indexes fairly quickly.
+Configure the IVF indexes in [/benchmarks/python_scripts/setup_core_index.py](/benchmarks/python_scripts/setup_core_index.py).
## Running Benchmarks
Once you have downloaded and created the indexes, you can start benchmarking.
-### Requirements
-1. Clang++17 or higher.
-2. CMake 3.26 or higher.
-3. Set CXX variable. E.g., `export CXX="/usr/bin/clang++-18"`
+## Prerequisites
+
+### Clang, CMake, OpenMP and a BLAS implementation
+Check [INSTALL.md](./INSTALL.md).
### Building
We built our scripts with the proper `march` flags. Below are the flags we used for each microarchitecture:
```sh
-# GRAVITON4
-cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS_RELEASE="-O3 -mcpu=neoverse-v2"
-# GRAVITON3
-cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS_RELEASE="-O3 -mcpu=neoverse-v1"
-# Intel Sapphire Rapids (256 vectors are used if mprefer-vector-width is not specified)
-cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS_RELEASE="-O3 -march=sapphirerapids -mtune=sapphirerapids -mprefer-vector-width=512"
-# ZEN4
-cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS_RELEASE="-O3 -march=znver4 -mtune=znver4"
-# ZEN3
-cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS_RELEASE="-O3 -march=znver3 -mtune=znver3"
-
-make
+cmake . -DPDX_COMPILE_BENCHMARKS=ON
+make benchmarks
```
-On the [/benchmarks/CMakeLists.txt](/benchmarks/CMakeLists.txt) file, you can find which `.cpp` files map to which benchmark.
-
-
-## Complete benchmarking scripts list
-
-### IVF index searches
-
-- PDX IVF ADSampling: `/benchmarks/BenchmarkPDXADSampling`
-- PDX IVF ADSampling + SQ8: `/benchmarks/BenchmarkU8PDXADSampling`
-- PDX Two-Level IVF ADSampling: `/benchmarks/BenchmarkIVF2ADSampling`
-- PDX Two Level IVF ADSampling + SQ8: `/benchmarks/BenchmarkU8IVF2ADSampling`
-- PDX IVF BOND: `/benchmarks/BenchmarkPDXIVFBOND`
+## Benchmarking scripts list
+- Index Creation and Search: `/benchmarks/BenchmarkEndToEnd`
+- PDX IVF: `/benchmarks/BenchmarkPDXIVF`
- FAISS IVF: `/benchmarks/python_scripts/ivf_faiss.py`
-All of these programs have two optional parameters:
-- `` to specify the name of the dataset to use. If not given, it will try to use all the datasets set in [benchmark_utils.hpp](/include/utils/benchmark_utils.hpp) or [benchmark_utils.py](/benchmarks/python_scripts/benchmark_utils.py) in the Python scripts.
-- `` to specify the `nprobe` parameter on the IVF index, which controls the recall. If not given or `0`, it will use a series of parameters from 2 to 4096 set in the [benchmark_utils.hpp](/include/utils/benchmark_utils.hpp) or [benchmark_utils.py](/benchmarks/python_scripts/benchmark_utils.py) in the Python scripts.
+PDX programs have three parameters:
+- `` to specify the type of PDX index to use. We support 4 index types. From least to most performant:
+ - `pdx_f32`: IVF index with float32 vectors
+ - `pdx_tree_f32`: Tree IVF index with float32 vectors
+ - `pdx_u8`: IVF index with 8-bit scalar quantization
+ - `pdx_tree_u8`: Tree IVF index with 8-bit scalar quantization
+- `` to specify the identifier of the dataset to use. If not given, it will try to use all the datasets set in [benchmark_utils.hpp](/include/utils/benchmark_utils.hpp) or [benchmark_utils.py](/benchmarks/python_scripts/benchmark_utils.py) in the Python scripts.
+- `` to specify the `nprobe` parameter on the IVF index, which controls the recall. If not given or `0`, it will use a series of parameters from 2 to 4096 set in the [benchmark_utils.hpp](/include/utils/benchmark_utils.hpp) or [benchmark_utils.py](/benchmarks/python_scripts/benchmark_utils.py) in the Python scripts.
-PDX IVF BOND has an additional third parameter:
-- ``: An integer value. On Intel SPR, we use distance-to-means (`1`). For the other microarchitectures, we use dimension-zones (`5`). Refer to Figure 5 of [our publication](https://ir.cwi.nl/pub/35044/35044.pdf).
+
-> [!IMPORTANT]
-> Recall that the IVF indexes must be created beforehand by the `setup_data.py` script.
-
-### Exact Search
-- PDX BOND: ```/benchmarks/BenchmarkPDXBOND```
-- USearch: ```python /benchmarks/python_scripts/exact_usearch.py```
-- SKLearn: ```python /benchmarks/python_scripts/exact_sklearn.py```
-- FAISS: ```python /benchmarks/python_scripts/exact_faiss.py```
+List of Datasets
-All of these programs have one optional parameter:
-- `` to specify the name of the dataset to use. If not given, it will try to use all the datasets set in [benchmark_utils.hpp](/include/utils/benchmark_utils.hpp) or [benchmark_utils.py](/benchmarks/python_scripts/benchmark_utils.py) in the Python scripts.
+| Identifier | Dataset HDF5 Name in Google Drive | Embeddings | Model | # Vectors | Dim. | Size (GB) ↑ |
+| ------------ | ----------------------------------- | ------------- | ------------ | --------- | ---- | ----------- |
+| `arxiv` | `instructorxl-arxiv-768` | Text | InstructorXL | 2,253,000 | 768 | 6.92 |
+| `openai` | `openai-1536-angular` | Text | OpenAI | 999,000 | 1536 | 6.14 |
+| `wiki` | `simplewiki-openai-3072-normalized` | Text | OpenAI | 260,372 | 3072 | 3.20 |
+| `mxbai` | `agnews-mxbai-1024-euclidean` | Text | MXBAI | 769,382 | 1024 | 3.15 |
-PDX BOND has an additional second parameter:
-- ``: An integer value. On exact-search, we always use distance-to-means (`1`). Refer to Figure 5 of [our publication](https://ir.cwi.nl/pub/35044/35044.pdf).
+
-**Notes**: Usearch, SKLearn, and FAISS scripts expect the original `.hdf5` files under the `/downloaded` directory. Furthermore, they require their respective Python packages (`pip install -r ./benchmarks/python_scripts/requirements.txt`).
+> [!IMPORTANT]
+> Recall that for `BenchmarkPDXIVF` and `ivf_faiss.py` the indexes must be created beforehand by the `setup_data.py` script.
## Output
Output is written in a .csv format to the `/benchmarks/results/DEFAULT` directory. Each file contains entries detailing the experiment parameters, such as the dataset, algorithm, kNN, number of queries (`n_queries`), `ivf_nprobe`, and, more importantly, the average runtime per query in ms in the `avg` column. Each benchmarking script will create a file with a different name.
-
-## Kernels Experiment
-Visit our playground for PDX vs SIMD kernels [here](./benchmarks/bench_kernels)
-
-## SIGMOD'25
-Check the `sigmod` branch.
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c8d1d3b..22f2eba 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,13 +1,55 @@
cmake_minimum_required(VERSION 3.26)
-set(CMAKE_CXX_STANDARD 17)
project(PDX)
+# Default to Release build if not specified
+if(NOT CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type (default: Release)" FORCE)
+endif()
+message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native")
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/extern/findFFTW")
+
add_compile_options(-fPIC)
include(FetchContent)
include(CheckCXXCompilerFlag)
include(CMakePrintHelpers)
include(CTest)
-# include(ExternalProject)
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/extern/SuperKMeans/include)
+
+find_package(OpenMP REQUIRED)
+
+list(PREPEND CMAKE_PREFIX_PATH /usr/local)
+
+set(MKL_INTERFACE_FULL "intel_lp64")
+find_package(MKL CONFIG QUIET)
+if (MKL_FOUND)
+ message(STATUS "MKL library found")
+ message(STATUS "MKL targets: ${MKL_IMPORTED_TARGETS}")
+ get_target_property(mkl_includes MKL::MKL INTERFACE_INCLUDE_DIRECTORIES)
+ message(STATUS "MKL includes: ${mkl_includes}")
+ add_definitions(-DEIGEN_USE_MKL_ALL)
+ set(MKL_COMMON_LIBS MKL::MKL OpenMP::OpenMP_CXX m dl)
+ set(BLAS_LINK_LIBRARIES "")
+else()
+ set(MKL_COMMON_LIBS "")
+ message(STATUS "MKL not found. Trying to find a BLAS implementation")
+
+ # On macOS, prefer Apple's Accelerate framework over other BLAS implementations
+ if(APPLE)
+ set(BLA_VENDOR Apple)
+ message(STATUS "macOS detected: prioritizing Apple Accelerate framework")
+ endif()
+
+ find_package(BLAS REQUIRED)
+ message(STATUS "BLAS library found: ${BLAS_LIBRARIES}")
+ add_definitions(-DEIGEN_USE_BLAS)
+ set(BLAS_LINK_LIBRARIES ${BLAS_LIBRARIES} OpenMP::OpenMP_CXX)
+endif()
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "extern/findFFTW")
find_package(FFTW QUIET)
@@ -16,42 +58,69 @@ if (FFTW_FLOAT_LIB_FOUND)
add_definitions(-DHAS_FFTW)
include_directories(${FFTW_INCLUDE_DIRS})
else()
- message(WARNING "FFTW (+float capabilities) not found, we recommend you install it")
-# TODO: Perhaps downloading?
-# set(FFTW_INSTALL_DIR ${CMAKE_BINARY_DIR}/fftw-install)
-# ExternalProject_Add(fftw
-# URL http://www.fftw.org/fftw-3.3.10.tar.gz
-# PREFIX fftw-src
-# CONFIGURE_COMMAND ./configure --prefix=${FFTW_INSTALL_DIR} --enable-float
-# BUILD_COMMAND make -j4
-# INSTALL_COMMAND make install
-# )
-# set(FFTW_INCLUDE_DIRS ${FFTW_INSTALL_DIR}/include)
-# set(FFTW_FLOAT_LIB ${FFTW_INSTALL_DIR}/lib/libfftw3f.so)
-# set(FFTW_FOUND TRUE)
-# add_definitions(-DHAS_FFTW)
+ message(STATUS "FFTW (+float capabilities) not found, we recommend you install them for increased performance")
endif()
-# Installing FFTW https://www.fftw.org/fftw3_doc/Installation-on-Unix.html ---------------------------------------------
-# wget https://www.fftw.org/fftw-3.3.10.tar.gz
-# tar -xvzf fftw-3.3.10.tar.gz
-# cd fftw-3.3.10
-# ./configure --enable-float --enable-shared # --enabled-shared is required for ctypes.util.find_library("fftw3f")
-# sudo make
-# sudo make install
-# ldconfig
-
-# CMAKE_SOURCE_DIR: ----------------------------------------------------------------------------------------------------
add_compile_definitions(CMAKE_SOURCE_DIR="${CMAKE_SOURCE_DIR}")
-# Gtest: ---------------------------------------------------------------------------------------------------------------
-#include(FetchContent)
-#FetchContent_Declare(
-# googletest
-# URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
-#)
-#set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
-#FetchContent_MakeAvailable(googletest)
-
-include_directories(include extern)
-add_subdirectory(benchmarks)
+include_directories(include extern/Eigen)
+
+# Benchmarks: disabled by default, enable with -DPDX_COMPILE_BENCHMARKS=ON
+set(PDX_COMPILE_BENCHMARKS OFF CACHE BOOL "Whether to compile benchmarks")
+if(PDX_COMPILE_BENCHMARKS)
+ message(STATUS "Benchmarks enabled")
+ add_subdirectory(benchmarks)
+endif()
+
+# Tests: disabled by default, enable with -DPDX_COMPILE_TESTS=ON
+set(PDX_COMPILE_TESTS OFF CACHE BOOL "Whether to compile tests")
+if(PDX_COMPILE_TESTS)
+ message(STATUS "Tests enabled")
+ add_subdirectory(tests)
+endif()
+
+# Python bindings: only build if pybind11 is available
+find_package(Python COMPONENTS Interpreter Development.Module QUIET)
+set(PYBIND11_FINDPYTHON ON)
+find_package(pybind11 CONFIG QUIET)
+
+if(Python_FOUND AND pybind11_FOUND)
+ message(STATUS "Python bindings enabled")
+ message(STATUS "Python executable: ${Python_EXECUTABLE}")
+ message(STATUS "pybind11 found: ${pybind11_VERSION}")
+
+ pybind11_add_module(compiled
+ python/lib.cpp
+ )
+
+ target_include_directories(compiled PRIVATE
+ ${CMAKE_CURRENT_SOURCE_DIR}/include
+ ${CMAKE_CURRENT_SOURCE_DIR}/extern/Eigen
+ ${CMAKE_CURRENT_SOURCE_DIR}/extern/SuperKMeans/include
+ )
+
+ if(MKL_FOUND)
+ target_link_libraries(compiled PRIVATE ${MKL_COMMON_LIBS})
+ else()
+ target_link_libraries(compiled PRIVATE ${BLAS_LINK_LIBRARIES})
+ endif()
+
+ if(FFTW_FOUND)
+ target_link_libraries(compiled PRIVATE ${FFTW_FLOAT_LIB} ${FFTW_FLOAT_OPENMP_LIB})
+ endif()
+
+ target_compile_options(compiled PRIVATE
+ -Wall
+ -Wno-unknown-pragmas
+ $<$:-O3 -march=native>
+ )
+
+ target_compile_features(compiled PRIVATE cxx_std_17)
+
+ install(TARGETS compiled
+ LIBRARY DESTINATION pdxearch
+ COMPONENT python
+ )
+else()
+ message(STATUS "Python bindings disabled (pybind11 not found)")
+endif()
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..7a2922d
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,66 @@
+# Contributing
+
+We are actively developing PDX and accepting contributions! Any kind of PR is welcome.
+
+These are our current priorities:
+
+**Features**:
+- Inserts and Updates (wip).
+- Out-of-core execution (disk-based setting).
+- Implement multi-threading capabilities.
+- Add PDX to the [VIBE benchmark](https://vector-index-bench.github.io/).
+- Create a documentation.
+
+**Improvements**:
+- Regression tests on CI.
+
+
+## Getting Started
+
+1. **Fork the repository** on GitHub and create a feature branch:
+```bash
+git checkout -b my-feature
+```
+
+2. **Make your changes.**
+3. **Run the test suite** locally before submitting your PR.
+4. **Open a Pull Request (PR)** against the `main` branch.
+
+> [!IMPORTANT]
+> Let us know in advance if you plan implementing a big feature!
+
+## Testing
+
+All PRs must pass the full test suite in CI. Before submitting a PR, you should run tests locally:
+
+```bash
+# C++ tests
+cmake . -DPDX_COMPILE_TESTS=ON
+make -j$(nproc) tests
+ctest .
+```
+
+Tests are also prone to bugs. If that is the case, please open an Issue.
+
+## Submitting a PR
+
+* Open your PR against the **`main` branch**.
+* Make sure your branch is **rebased on top of `main`** before submission.
+* Verify that **CI passes**.
+* Keep PRs focused — small, logical changes are easier to review and merge.
+
+## Coding Style
+* Function, Class, and Struct names: `PascalCase`
+* Variables and Class/Struct member names: `snake_case`
+* Constants and magic variables: `UPPER_SNAKE_CASE`
+* Avoid `new` and `delete`
+* There is a `.clang-format` in the project. Make sure to adhere to it. We have provided scripts to check and format the files within the project:
+```bash
+pip install clang-format==18.1.8
+./scripts/format_check.sh # Checks the formatting
+./scripts/format.sh # Fix the formatting
+```
+
+## Communication
+
+* Use GitHub Issues for bug reports and feature requests.
diff --git a/INSTALL.md b/INSTALL.md
new file mode 100644
index 0000000..64bc776
--- /dev/null
+++ b/INSTALL.md
@@ -0,0 +1,142 @@
+# Installation
+
+### PDX needs:
+- Clang 17, CMake 3.26
+- OpenMP
+- A BLAS implementation
+- Python 3 (only for Python bindings)
+
+Once you have these requirements, you can install the Python Bindings
+
+
+ Installing Python Bindings
+
+```sh
+git clone https://github.com/cwida/PDX
+cd PDX
+git submodule update --init
+
+# Create a venv if needed
+python -m venv ./venv
+source venv/bin/activate
+
+# Set proper clang compiler if needed
+export CXX="/usr/bin/clang++-18"
+
+pip install .
+```
+
+
+
+## Step by Step
+* [Installing Clang](#installing-clang)
+* [Installing CMake](#installing-cmake)
+* [Installing OpenMP](#installing-openmp)
+* [Installing BLAS](#installing-blas)
+* [Installing FFTW](#installing-blas) [optional]
+* [Troubleshooting](#troubleshooting)
+
+## Installing Clang
+We recommend LLVM
+### Linux
+```sh
+sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" -- 18
+```
+
+### MacOS
+```sh
+brew install llvm
+```
+
+## Installing CMake
+### Linux
+```sh
+sudo apt update
+sudo apt install make
+sudo apt install cmake
+```
+
+### MacOS
+```sh
+brew install cmake
+```
+
+## Installing OpenMP
+
+### Linux
+Most distributions come with OpenMP, or you can install it with:
+```sh
+sudo apt-get install libomp-dev
+```
+
+### MacOS
+```sh
+brew install libomp
+```
+
+
+## Installing BLAS
+
+BLAS is extremely important to achieve high performance. We recommend [OpenBLAS](https://github.com/OpenMathLib/OpenBLAS).
+
+### Linux
+Most distributions come with [OpenBLAS](https://github.com/OpenMathLib/OpenBLAS), or you may have already installed OpenBLAS via `apt`. **THIS IS SLOW**. We recommend installing OpenBLAS from source with the commands below.
+
+```sh
+git clone https://github.com/OpenMathLib/OpenBLAS.git
+cd OpenBLAS
+make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=1 NUM_THREADS=128
+make -j$(nproc) PREFIX=/usr/local install
+ldconfig
+```
+
+### MacOS
+**Silicon Chips (M1 to M5)**: You don't need to do anything special. We automatically detect [Apple Accelerate](https://developer.apple.com/documentation/accelerate) that uses the [AMX](https://github.com/corsix/amx) unit.
+
+**Intel Chips (older Macs)**: Install OpenBLAS as detailed above.
+
+## Installing FFTW
+[FFTW](https://www.fftw.org/fftw3_doc/Installation-on-Unix.html) will give you better performance in very high-dimensional datasets (d > 1024).
+
+```sh
+wget https://www.fftw.org/fftw-3.3.10.tar.gz
+tar -xvzf fftw-3.3.10.tar.gz
+cd fftw-3.3.10
+./configure --enable-float --enable-shared --enable-openmp
+sudo make -j$(nproc)
+sudo make install
+ldconfig
+```
+
+## Troubleshooting
+
+### Python bindings installation fails
+
+Error:
+```
+Could NOT find Python (missing: Development.Module)
+ Reason given by package:
+ Development: Cannot find the directory "/usr/include/python3.12"
+```
+
+Solution: Install `python-dev` package:
+
+```sh
+sudo apt install python3-dev
+```
+
+### I get a bunch of `warnings` when compiling PDX
+
+If you see a lot of warnings like this one:
+```warning: ignoring ‘#pragma clang loop’```
+
+You are using GCC instead of Clang. If you installed Clang, you can set the correct compiler by doing the following:
+```sh
+export CXX="/usr/bin/clang++-18" # Linux
+
+export CXX="/opt/homebrew/opt/llvm/bin/clang++" # MacOS
+```
+
+### Does PDX use SIMD?
+Yes. We have optimizations for AVX512, AVX2, and NEON. You don't need to do anything special to activate these. If your machine doesn't have any of these, we rely on scalar code.
+
diff --git a/README.md b/README.md
index d076894..cbfd6ec 100644
--- a/README.md
+++ b/README.md
@@ -1,133 +1,90 @@
- PDX: A Transposed Data Layout for Similarity Search
+ PDX: A Library for Fast Vector Search and Indexing
-
-[PDX](https://ir.cwi.nl/pub/35044/35044.pdf) is a data layout that **transposes** vectors in a column-major order. This layout unleashes the true potential of dimension pruning, accelerating vanilla IVF indexes by factors:
+
+ Index millions of vectors in seconds. Search them in milliseconds.
+
-
+
-PDX makes an IVF index, competitive with HNSW:
-
-
-
+## Why PDX?
-### PDX benefits:
-
-- ⚡ Up to [**10x faster**](#two-level-ivf-ivf2-) **IVF searches** than FAISS+AVX512.
-- ⚡ Up to [**30x faster**](#exhaustive-search--ivf) **exhaustive search**.
+- ⚡ [**30x faster index building**](https://www.lkuffo.com/superkmeans/) thanks to [SuperKMeans](https://github.com/lkuffo/SuperKMeans).
+- ⚡ [**Sub-millisecond similarity search**](https://www.lkuffo.com/sub-milisecond-similarity-search-with-pdx/), up to [**10x faster**](./BENCHMARKING.md#two-level-ivf-ivf2-) than FAISS IVF.
+- ⚡ Up to [**30x faster**](./BENCHMARKING.md#exhaustive-search--ivf) exhaustive search.
- 🔍 Efficient [**filtered search**](https://github.com/cwida/PDX/issues/7).
+- Query latency competitive with HNSW, with the ease of use of IVF.
-
-
-## Contents
-- [Pruning in a nutshell](#pruning-in-a-nutshell)
-- [Try PDX](#try-pdx)
-- [Use cases (comparison with FAISS)](#use-cases-and-benchmarks)
-- [The data layout](#the-data-layout)
-- [Roadmap](#roadmap)
+## Our secret sauce
-## Pruning in a nutshell
+[PDX](https://ir.cwi.nl/pub/35044/35044.pdf) is a data layout that **transposes** vectors in a column-major order. This layout unleashes the true potential of dimension pruning.
-Pruning means avoiding checking *all* the dimensions of a vector to determine if it is a neighbour of a query. The PDX layout unleashes the true potential of these algorithms (e.g., [ADSampling](https://github.com/gaoj0017/ADSampling/)), accelerating vanilla IVF indexes by factors.
+Pruning means avoiding checking *all* the dimensions of a vector to determine if it is a neighbour of a query, accelerating index construction and similarity search by factors.
-Pruning is especially effective for large embeddings (`d > 512`) and when targeting high recalls (`> 0.90`) or nearly exact results.
+## Use Cases and Benchmarking
+Check [./BENCHMARKING.md](./BENCHMARKING.md).
-[Down below](#use-cases-and-benchmarks), you will find **benchmarks** against FAISS.
+## Usage
+```py
+from pdxearch import IndexPDXIVFTreeSQ8
+data = ... # Numpy 2D matrix
+query = ... # Numpy 1D array
+d = 1024
+knn = 20
-## Try PDX
-Try PDX with your data using our Python bindings and [examples](/examples). We have implemented PDX on Flat (`float32`) and Quantized (`8-bit`) **IVF indexes** and **exhaustive search** settings.
-### Prerequisites
-- PDX is available for x86_64 (with AVX512), ARM, and Apple silicon
-- Python 3.11 or higher
-- [FAISS](https://github.com/facebookresearch/faiss/blob/main/INSTALL.md) with Python Bindings
-- Clang++17 or higher
-- CMake 3.26 or higher
-
-### Installation Steps
-1. Clone and init submodules
-```sh
-git clone https://github.com/cwida/PDX
-git submodule init
-git submodule update
-```
+index = IndexPDXIVFTreeSQ8(num_dimensions=d)
+index.build(data)
-2. *[Optional]* Install [FFTW](https://www.fftw.org/fftw3_doc/Installation-on-Unix.html) (for higher throughput)
-```sh
-wget https://www.fftw.org/fftw-3.3.10.tar.gz
-tar -xvzf fftw-3.3.10.tar.gz
-cd fftw-3.3.10
-./configure --enable-float --enable-shared
-sudo make
-sudo make install
-ldconfig
-```
+ids, dists = index.search(query, knn)
-3. Install Python dependencies and the bindings.
-```sh
-export CXX="/usr/bin/clang++-18" # Set proper CXX first
-pip install -r requirements.txt
-python setup.py clean --all
-python -m pip install .
```
-4. Run the examples under `/examples`
-```sh
-# Creates an IVF index with FAISS on random data
-# Then, it compares the search performance of PDXearch and FAISS
-python ./examples/pdx_simple.py
-```
-For more details on the available examples and how to use your own data, refer to [/examples/README.md](./examples/README.md).
-
-> [!NOTE]
-> We heavily rely on [FAISS](https://github.com/facebookresearch/faiss/blob/main/INSTALL.md) to create the underlying IVF indexes. To quickly install it you can do: `pip install faiss-cpu`
-
-## Use Cases and Benchmarks
-We present single-threaded **benchmarks** against FAISS+AVX512 on an `r7iz.xlarge` (Intel Sapphire Rapids) instance.
-### Two-Level IVF (IVF2) 
-IVF2 tackles a bottleneck of IVF indexes: finding the nearest centroids. By clustering the original IVF centroids, we can use PDX to quickly scan them (thanks to pruning) without sacrificing recall. This achieves significant throughput improvements when paired with `8-bit` quantization.
+`IndexPDXIVFTreeSQ8` is our fastest index that will give you the best performance. It is a two-level IVF index with 8-bit quantization.
-
-
-
-
-### Vanilla IVF
-Here, PDX, paired with the pruning algorithm ADSampling on `float32`, achieves significant speedups.
+Check our [examples](./examples/) for fully working examples in Python and our [benchmarks](./benchmarks) for fully working examples in C++. We support Flat (`float32`) and Quantized (`8-bit`) indexes, as well as the most common distance metrics.
-
-
-
+## Installation
+We provide Python bindings for ease of use. Soon, we will be available on PyPI.
+### Prerequisites
+- Clang 17, CMake 3.26
+- OpenMP
+- A BLAS implementation
+- Python 3 (only for Python bindings)
-### Exhaustive search + IVF
-An exhaustive search scans all the vectors in the collection. Having an IVF index with PDX can **EXTREMELY** accelerate this without sacrificing recall, thanks to the reliable pruning of ADSampling.
+### Installation Steps
+```sh
+git clone --recurse-submodules https://github.com/cwida/PDX
+cd PDX
-
-
-
+pip install .
-The key observation here is that thanks to the underlying IVF index, the exhaustive search starts with the most promising clusters. A tight threshold is found early on, which enables the quick pruning of most candidates.
+# Run the examples under `/examples`
+# pdx_simple.py creates an IVF index with FAISS on random data
+# Then, it compares the search performance of PDX and FAISS
+python ./examples/pdx_simple.py
+```
-### Exhaustive search without an index
-By creating random clusters with the PDX layout, you can still accelerate exhaustive search without an index. Unlike ADSampling, with BOND (our pruning algorithm), you can use the raw vectors. Gains vary depending on the distribution of the data.
+For a more comprehensive installation and compilation guide, check [INSTALL.md](./INSTALL.md).
-
-
-
+## Getting the Best Performance
+Check [INSTALL.md](./INSTALL.md).
-### No pruning and no index
-Even without pruning, PDX distance kernels can be faster than SIMD ones in most CPU microarchitectures. For detailed information, check Figure 3 of [our publication](https://ir.cwi.nl/pub/35044/35044.pdf). You can also try it yourself in our playground [here](./benchmarks/bench_kernels).
+## Roadmap
+We are actively developing Super K-Means and accepting contributions! Check [CONTRIBUTING.md](./CONTRIBUTING.md)
## The Data Layout
-PDX is a transposed layout (a.k.a. columnar, or decomposed layout), which means that the same dimensions of different vectors are stored sequentially. This decomposition occurs within a block (e.g., a cluster in an IVF index).
+PDX is a transposed layout (a.k.a. columnar, or decomposed layout), meaning that the dimensions of different vectors are stored sequentially. This decomposition occurs within a block (e.g., a cluster in an IVF index).
We have evolved our layout from the one presented in our publication to reduce random access, and adapted it to work with `8-bit` and (in the future) `1-bit` vectors.
@@ -140,27 +97,15 @@ The following image shows this layout. Storage is sequential from left to right,
### `8 bits`
-Smaller data types are not friendly to PDX, as we must accumulate distances on wider types, resulting in asymmetry. We can work around this by changing the PDX layout. For `8 bits`, the vertical block is decomposed every 4 dimensions. This allows us to use dot product instructions (`VPDPBUSD` in [x86](https://www.officedaytime.com/simd512e/simdimg/si.php?f=vpdpbusd) and `UDOT/SDOT` in [NEON](https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions-)) to calculate L2 or IP kernels while still benefiting from PDX. The horizontal block remains decomposed every 64 dimensions.
+Smaller data types are not friendly to PDX, as we must accumulate distances on wider types, resulting in asymmetry. We can work around this by changing the PDX layout. For `8 bits`, the vertical block is decomposed every 4 dimensions. This allows us to use dot-product instructions (`VPDPBUSD` on [x86](https://www.officedaytime.com/simd512e/simdimg/si.php?f=vpdpbusd) and `UDOT/SDOT` on [NEON](https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions-)) to calculate L2 or IP kernels while still benefiting from PDX. The horizontal block remains decomposed every 64 dimensions.
-### `binary`
-For Hamming/Jaccard kernels, we use a layout decomposed every 8 dimensions (naturally grouped into bytes). The population count accumulation can be done in `bytes`. If d > 256, we flush the popcounts into a wider type every 32 words (corresponding to 256 dimensions). This has not been implemented in this repository yet, but you can find some promising benchmarks [here](https://github.com/lkuffo/binary-index).
-
-## Roadmap
-- Out-of-core execution (disk-based setting).
-- Add unit tests.
-- Implement multi-threading capabilities.
-- Add PDX to the [VIBE benchmark](https://vector-index-bench.github.io/).
-- Adaptive quantization on 8-bit and 4-bit.
-- Create a documentation.
-> [!IMPORTANT]
-> PDX is an ongoing research project. In its current state, it is not production-ready code.
+
-## Benchmarking
-To run our benchmark suite in C++, refer to [BENCHMARKING.md](./BENCHMARKING.md).
## Citation
If you use PDX for your research, consider citing us:
@@ -177,6 +122,3 @@ If you use PDX for your research, consider citing us:
publisher={ACM New York, NY, USA}
}
```
-
-## SIGMOD
-The code used for the experiments presented at SIGMOD'25 can be found in the `sigmod` branch.
diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
index 10506d2..fa95195 100644
--- a/benchmarks/CMakeLists.txt
+++ b/benchmarks/CMakeLists.txt
@@ -1,22 +1,29 @@
-# ADSampling
-add_executable(BenchmarkPDXADSampling bench_adsampling/pdx_ivf_adsampling.cpp)
-add_executable(BenchmarkIVF2ADSampling bench_adsampling/pdx_ivf2_adsampling.cpp)
-add_executable(BenchmarkU8PDXADSampling bench_adsampling/pdx_ivf_adsampling_u8.cpp)
-add_executable(BenchmarkU8IVF2ADSampling bench_adsampling/pdx_ivf2_adsampling_u8.cpp)
-add_executable(FilteredBenchmarkU8IVF2ADSampling bench_adsampling/pdx_ivf2_adsampling_u8_filtered.cpp)
-add_executable(FilteredBenchmarkPDXADSampling bench_adsampling/pdx_ivf_adsampling_filtered.cpp)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
-# BOND
-add_executable(BenchmarkPDXIVFBOND ./bench_bond/pdx_bond_ivf.cpp)
-add_executable(BenchmarkPDXBOND ./bench_bond/pdx_bond.cpp)
+set(BENCH_COMMON_LIBS ${MKL_COMMON_LIBS} ${BLAS_LINK_LIBRARIES})
if (FFTW_FOUND)
- target_link_libraries(BenchmarkPDXADSampling ${FFTW_FLOAT_LIB})
- target_link_libraries(FilteredBenchmarkPDXADSampling ${FFTW_FLOAT_LIB})
- target_link_libraries(BenchmarkIVF2ADSampling ${FFTW_FLOAT_LIB})
- target_link_libraries(BenchmarkU8PDXADSampling ${FFTW_FLOAT_LIB})
- target_link_libraries(BenchmarkU8IVF2ADSampling ${FFTW_FLOAT_LIB})
- target_link_libraries(FilteredBenchmarkU8IVF2ADSampling ${FFTW_FLOAT_LIB})
- target_link_libraries(BenchmarkPDXIVFBOND ${FFTW_FLOAT_LIB})
- target_link_libraries(BenchmarkPDXBOND ${FFTW_FLOAT_LIB})
+ message(STATUS "Linking FFTW: ${FFTW_FLOAT_LIB} ${FFTW_FLOAT_OPENMP_LIB}")
+ list(APPEND BENCH_COMMON_LIBS ${FFTW_FLOAT_LIB} ${FFTW_FLOAT_OPENMP_LIB})
endif()
+
+add_executable(BenchmarkPDXIVF pdx_ivf.cpp)
+add_executable(BenchmarkEndToEnd pdx_end_to_end.cpp)
+add_executable(BenchmarkSerialization pdx_serialization.cpp)
+add_executable(BenchmarkFiltered pdx_filtered.cpp)
+add_executable(BenchmarkSpecialFilters pdx_special_filtered.cpp)
+
+target_link_libraries(BenchmarkPDXIVF ${BENCH_COMMON_LIBS})
+target_link_libraries(BenchmarkEndToEnd ${BENCH_COMMON_LIBS})
+target_link_libraries(BenchmarkSerialization ${BENCH_COMMON_LIBS})
+target_link_libraries(BenchmarkFiltered ${BENCH_COMMON_LIBS})
+target_link_libraries(BenchmarkSpecialFilters ${BENCH_COMMON_LIBS})
+
+add_custom_target(benchmarks
+ DEPENDS
+ BenchmarkPDXIVF
+ BenchmarkEndToEnd
+ BenchmarkSerialization
+ BenchmarkFiltered
+ BenchmarkSpecialFilters
+)
diff --git a/benchmarks/bench_adsampling/pdx_ivf2_adsampling.cpp b/benchmarks/bench_adsampling/pdx_ivf2_adsampling.cpp
deleted file mode 100644
index da05572..0000000
--- a/benchmarks/bench_adsampling/pdx_ivf2_adsampling.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-#ifndef BENCHMARK_TIME
-#define BENCHMARK_TIME = true
-#endif
-
-#ifndef PDX_USE_EXPLICIT_SIMD
-#define PDX_USE_EXPLICIT_SIMD = true
-#endif
-
-#include
-#include
-#include "utils/file_reader.hpp"
-#include "index_base/pdx_ivf2.hpp"
-#include "pruners/adsampling.hpp"
-#include "pdxearch.hpp"
-#include "utils/benchmark_utils.hpp"
-
-int main(int argc, char *argv[]) {
- std::string arg_dataset;
- size_t arg_ivf_nprobe = 0;
- if (argc > 1){
- arg_dataset = argv[1];
- }
- if (argc > 2){
- arg_ivf_nprobe = atoi(argv[2]);
- }
- std::cout << "==> PDX IVF ADSampling\n";
-
- std::string ALGORITHM = "adsampling";
- const bool VERIFY_RESULTS = BenchmarkUtils::VERIFY_RESULTS;
-
- uint8_t KNN = BenchmarkUtils::KNN;
- float EPSILON0 = BenchmarkUtils::EPSILON0;
- size_t NUM_QUERIES;
- size_t NUM_MEASURE_RUNS = BenchmarkUtils::NUM_MEASURE_RUNS;
-
- PDX::DimensionsOrder DIMENSION_ORDER = PDX::SEQUENTIAL;
-
- std::string RESULTS_PATH;
- RESULTS_PATH = BENCHMARK_UTILS.RESULTS_DIR_PATH + "IVF2_PDX_ADSAMPLING.csv";
-
-
- for (const auto & dataset : BenchmarkUtils::DATASETS) {
- if (arg_dataset.size() > 0 && arg_dataset != dataset){
- continue;
- }
- PDX::IndexPDXIVF2 pdx_data = PDX::IndexPDXIVF2();
- pdx_data.Restore(BenchmarkUtils::PDX_ADSAMPLING_DATA + dataset + "-ivf2");
-
- std::unique_ptr _matrix_ptr = MmapFile(BenchmarkUtils::NARY_ADSAMPLING_DATA + dataset + "-ivf2-matrix");
- auto *_matrix = reinterpret_cast(_matrix_ptr.get());
-
- std::unique_ptr query_ptr = MmapFile(BenchmarkUtils::QUERIES_DATA + dataset);
- auto *query = reinterpret_cast(query_ptr.get());
-
- NUM_QUERIES = 1000;
- std::unique_ptr ground_truth = MmapFile(BenchmarkUtils::GROUND_TRUTH_DATA + dataset + "_100_norm");
- auto *int_ground_truth = reinterpret_cast(ground_truth.get());
- query += 1; // skip number of embeddings
-
- PDX::ADSamplingPruner pruner = PDX::ADSamplingPruner(pdx_data.num_dimensions, EPSILON0, _matrix);
- PDX::PDXearch searcher = PDX::PDXearch(pdx_data, pruner, 1, DIMENSION_ORDER);
-
- std::vector nprobes_to_use;
- if (arg_ivf_nprobe > 0) {
- nprobes_to_use = {arg_ivf_nprobe};
- } else {
- nprobes_to_use.assign(std::begin(BenchmarkUtils::IVF_PROBES), std::end(BenchmarkUtils::IVF_PROBES));
- }
-
- for (size_t ivf_nprobe : nprobes_to_use) {
- if (pdx_data.num_clusters < ivf_nprobe){
- continue;
- }
- if (arg_ivf_nprobe > 0 && ivf_nprobe != arg_ivf_nprobe){
- continue;
- }
- std::vector runtimes;
- runtimes.resize(NUM_MEASURE_RUNS * NUM_QUERIES);
- searcher.SetNProbe(ivf_nprobe);
-
- float recalls = 0;
- if (VERIFY_RESULTS) {
- for (size_t l = 0; l < NUM_QUERIES; ++l) {
- auto result = searcher.Search(query + l * pdx_data.num_dimensions, KNN);
- BenchmarkUtils::VerifyResult(recalls, result, KNN, int_ground_truth, l);
- }
- }
- for (size_t j = 0; j < NUM_MEASURE_RUNS; ++j) {
- for (size_t l = 0; l < NUM_QUERIES; ++l) {
- searcher.Search(query + l * pdx_data.num_dimensions, KNN);
- runtimes[j + l * NUM_MEASURE_RUNS] = {
- searcher.end_to_end_clock.accum_time
- };
- }
- }
- float real_selectivity = 1 - BenchmarkUtils::SELECTIVITY_THRESHOLD;
- BenchmarkMetadata results_metadata = {
- dataset,
- ALGORITHM,
- NUM_MEASURE_RUNS,
- NUM_QUERIES,
- ivf_nprobe,
- KNN,
- recalls,
- real_selectivity
- };
- BenchmarkUtils::SaveResults(runtimes, RESULTS_PATH, results_metadata);
- }
- }
- return 0;
-}
\ No newline at end of file
diff --git a/benchmarks/bench_adsampling/pdx_ivf2_adsampling_u8.cpp b/benchmarks/bench_adsampling/pdx_ivf2_adsampling_u8.cpp
deleted file mode 100644
index dad758d..0000000
--- a/benchmarks/bench_adsampling/pdx_ivf2_adsampling_u8.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-#ifndef BENCHMARK_TIME
-#define BENCHMARK_TIME = true
-#endif
-
-#ifndef PDX_USE_EXPLICIT_SIMD
-#define PDX_USE_EXPLICIT_SIMD = true
-#endif
-
-#include
-#include
-#include "utils/file_reader.hpp"
-#include "index_base/pdx_ivf2.hpp"
-#include "pruners/adsampling.hpp"
-#include "pdxearch.hpp"
-#include "utils/benchmark_utils.hpp"
-
-int main(int argc, char *argv[]) {
- std::string arg_dataset;
- size_t arg_ivf_nprobe = 0;
- if (argc > 1){
- arg_dataset = argv[1];
- }
- if (argc > 2){
- arg_ivf_nprobe = atoi(argv[2]);
- }
- std::cout << "==> PDX IVF ADSampling\n";
-
- std::string ALGORITHM = "adsampling";
- const bool VERIFY_RESULTS = BenchmarkUtils::VERIFY_RESULTS;
-
- uint8_t KNN = BenchmarkUtils::KNN;
- float EPSILON0 = BenchmarkUtils::EPSILON0;
- size_t NUM_QUERIES;
- size_t NUM_MEASURE_RUNS = BenchmarkUtils::NUM_MEASURE_RUNS;
-
- PDX::DimensionsOrder DIMENSION_ORDER = PDX::SEQUENTIAL;
-
- std::string RESULTS_PATH;
- RESULTS_PATH = BENCHMARK_UTILS.RESULTS_DIR_PATH + "U8_IVF2_PDX_ADSAMPLING.csv";
-
-
- for (const auto & dataset : BenchmarkUtils::DATASETS) {
- if (arg_dataset.size() > 0 && arg_dataset != dataset){
- continue;
- }
- PDX::IndexPDXIVF2 pdx_data = PDX::IndexPDXIVF2();
- pdx_data.Restore(BenchmarkUtils::PDX_ADSAMPLING_DATA + dataset + "-ivf2-u8");
- std::unique_ptr _matrix_ptr = MmapFile(BenchmarkUtils::NARY_ADSAMPLING_DATA + dataset + "-ivf2-u8-matrix");
- auto *_matrix = reinterpret_cast(_matrix_ptr.get());
-
- std::unique_ptr query_ptr = MmapFile(BenchmarkUtils::QUERIES_DATA + dataset);
- auto *query = reinterpret_cast(query_ptr.get());
- NUM_QUERIES = 1000;
-
- std::unique_ptr ground_truth = MmapFile(BenchmarkUtils::GROUND_TRUTH_DATA + dataset + "_100_norm");
- auto *int_ground_truth = reinterpret_cast(ground_truth.get());
- query += 1; // skip number of embeddings
-
- PDX::ADSamplingPruner pruner = PDX::ADSamplingPruner(pdx_data.num_dimensions, EPSILON0, _matrix);
- PDX::PDXearch searcher = PDX::PDXearch>(pdx_data, pruner, 1, DIMENSION_ORDER);
-
- std::vector nprobes_to_use;
- if (arg_ivf_nprobe > 0) {
- nprobes_to_use = {arg_ivf_nprobe};
- } else {
- nprobes_to_use.assign(std::begin(BenchmarkUtils::IVF_PROBES), std::end(BenchmarkUtils::IVF_PROBES));
- }
-
- for (size_t ivf_nprobe : nprobes_to_use) {
- if (pdx_data.num_clusters < ivf_nprobe){
- continue;
- }
- if (arg_ivf_nprobe > 0 && ivf_nprobe != arg_ivf_nprobe){
- continue;
- }
- std::vector runtimes;
- runtimes.resize(NUM_MEASURE_RUNS * NUM_QUERIES);
- searcher.SetNProbe(ivf_nprobe);
-
- float recalls = 0;
- if (VERIFY_RESULTS) {
- for (size_t l = 0; l < NUM_QUERIES; ++l) {
- auto result = searcher.Search(query + l * pdx_data.num_dimensions, KNN);
- BenchmarkUtils::VerifyResult(recalls, result, KNN, int_ground_truth, l);
- }
- }
- for (size_t j = 0; j < NUM_MEASURE_RUNS; ++j) {
- for (size_t l = 0; l < NUM_QUERIES; ++l) {
- searcher.Search(query + l * pdx_data.num_dimensions, KNN);
- runtimes[j + l * NUM_MEASURE_RUNS] = {
- searcher.end_to_end_clock.accum_time
- };
- }
- }
- float real_selectivity = 1 - BenchmarkUtils::SELECTIVITY_THRESHOLD;
- BenchmarkMetadata results_metadata = {
- dataset,
- ALGORITHM,
- NUM_MEASURE_RUNS,
- NUM_QUERIES,
- ivf_nprobe,
- KNN,
- recalls,
- real_selectivity
- };
- BenchmarkUtils::SaveResults(runtimes, RESULTS_PATH, results_metadata);
- }
- }
- return 0;
-}
\ No newline at end of file
diff --git a/benchmarks/bench_adsampling/pdx_ivf2_adsampling_u8_filtered.cpp b/benchmarks/bench_adsampling/pdx_ivf2_adsampling_u8_filtered.cpp
deleted file mode 100644
index e65f246..0000000
--- a/benchmarks/bench_adsampling/pdx_ivf2_adsampling_u8_filtered.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-#ifndef BENCHMARK_TIME
-#define BENCHMARK_TIME = true
-#endif
-
-#ifndef PDX_USE_EXPLICIT_SIMD
-#define PDX_USE_EXPLICIT_SIMD = true
-#endif
-
-#include
-#include
-#include "utils/file_reader.hpp"
-#include "index_base/pdx_ivf2.hpp"
-#include "pruners/adsampling.hpp"
-#include "pdxearch.hpp"
-#include "db_mock/predicate_evaluator.hpp"
-#include "utils/benchmark_utils.hpp"
-
-int main(int argc, char *argv[]) {
- std::string arg_dataset;
- std::string arg_selectivity;
- size_t arg_ivf_nprobe = 0;
- if (argc > 1){
- arg_dataset = argv[1];
- }
- if (argc > 2){
- arg_ivf_nprobe = atoi(argv[2]);
- }
- if (argc > 3){
- arg_selectivity = argv[3];
- } else {
- arg_selectivity = "0_99";
- }
- std::cout << "==> PDX IVF ADSampling\n";
-
- std::string ALGORITHM = "adsampling";
- const bool VERIFY_RESULTS = BenchmarkUtils::VERIFY_RESULTS;
-
- uint8_t KNN = BenchmarkUtils::KNN;
- float EPSILON0 = BenchmarkUtils::EPSILON0;
- size_t NUM_QUERIES;
- size_t NUM_MEASURE_RUNS = BenchmarkUtils::NUM_MEASURE_RUNS;
-
- PDX::DimensionsOrder DIMENSION_ORDER = PDX::SEQUENTIAL;
-
- std::string RESULTS_PATH;
- RESULTS_PATH = BENCHMARK_UTILS.RESULTS_DIR_PATH + "U8_IMI_PDX_ADSAMPLING_FILTERED.csv";
-
- std::cout << "==> SELECTIVITY: " << arg_selectivity << std::endl;
- for (const auto & dataset : BenchmarkUtils::DATASETS) {
- if (arg_dataset.size() > 0 && arg_dataset != dataset){
- continue;
- }
- PDX::IndexPDXIVF2 pdx_data = PDX::IndexPDXIVF2();
- pdx_data.Restore(BenchmarkUtils::PDX_ADSAMPLING_DATA + dataset + "-ivf2-u8");
- std::unique_ptr _matrix_ptr = MmapFile(BenchmarkUtils::NARY_ADSAMPLING_DATA + dataset + "-ivf2-u8-matrix");
- auto *_matrix = reinterpret_cast(_matrix_ptr.get());
-
- std::unique_ptr query_ptr = MmapFile(BenchmarkUtils::QUERIES_DATA + dataset);
- auto *query = reinterpret_cast(query_ptr.get());
-
- NUM_QUERIES = 1000;
- std::unique_ptr ground_truth = MmapFile(BenchmarkUtils::FILTERED_GROUND_TRUTH_DATA + dataset + "_100_norm_" + arg_selectivity);
- auto *int_ground_truth = reinterpret_cast(ground_truth.get());
- query += 1; // skip number of embeddings
-
- PDX::PredicateEvaluator predicate_evaluator = PDX::PredicateEvaluator(pdx_data.num_clusters);
- predicate_evaluator.LoadSelectionVectorFromFile(BenchmarkUtils::SELECTION_VECTOR_DATA + dataset + "_" + arg_selectivity + ".bin");
- PDX::ADSamplingPruner pruner = PDX::ADSamplingPruner(pdx_data.num_dimensions, EPSILON0, _matrix);
- PDX::PDXearch searcher = PDX::PDXearch>(pdx_data, pruner, 1, DIMENSION_ORDER);
-
- std::vector nprobes_to_use;
- if (arg_ivf_nprobe > 0) {
- nprobes_to_use = {arg_ivf_nprobe};
- } else {
- nprobes_to_use.assign(std::begin(BenchmarkUtils::IVF_PROBES), std::end(BenchmarkUtils::IVF_PROBES));
- }
-
- for (size_t ivf_nprobe : nprobes_to_use) {
- if (pdx_data.num_clusters < ivf_nprobe){
- continue;
- }
- if (arg_ivf_nprobe > 0 && ivf_nprobe != arg_ivf_nprobe){
- continue;
- }
- std::vector runtimes;
- runtimes.resize(NUM_MEASURE_RUNS * NUM_QUERIES);
- searcher.SetNProbe(ivf_nprobe);
-
- float recalls = 0;
- if (VERIFY_RESULTS) {
- for (size_t l = 0; l < NUM_QUERIES; ++l) {
- auto result = searcher.FilteredSearch(query + l * pdx_data.num_dimensions, KNN, predicate_evaluator);
- BenchmarkUtils::VerifyResult(recalls, result, KNN, int_ground_truth, l);
- }
- }
- for (size_t j = 0; j < NUM_MEASURE_RUNS; ++j) {
- for (size_t l = 0; l < NUM_QUERIES; ++l) {
- searcher.FilteredSearch(query + l * pdx_data.num_dimensions, KNN, predicate_evaluator);
- runtimes[j + l * NUM_MEASURE_RUNS] = {
- searcher.end_to_end_clock.accum_time
- };
- }
- }
- float real_selectivity = 1 - BenchmarkUtils::SELECTIVITY_THRESHOLD;
- BenchmarkMetadata results_metadata = {
- dataset,
- ALGORITHM,
- NUM_MEASURE_RUNS,
- NUM_QUERIES,
- ivf_nprobe,
- KNN,
- recalls,
- real_selectivity
- };
- BenchmarkUtils::SaveResults(runtimes, RESULTS_PATH, results_metadata);
- }
- }
- return 0;
-}
\ No newline at end of file
diff --git a/benchmarks/bench_adsampling/pdx_ivf_adsampling.cpp b/benchmarks/bench_adsampling/pdx_ivf_adsampling.cpp
deleted file mode 100644
index 0b62a05..0000000
--- a/benchmarks/bench_adsampling/pdx_ivf_adsampling.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-#ifndef BENCHMARK_TIME
-#define BENCHMARK_TIME = true
-#endif
-
-#ifndef PDX_USE_EXPLICIT_SIMD
-#define PDX_USE_EXPLICIT_SIMD = true
-#endif
-
-#include
-#include
-#include "utils/file_reader.hpp"
-#include "index_base/pdx_ivf.hpp"
-#include "pdxearch.hpp"
-#include "pruners/adsampling.hpp"
-#include "utils/benchmark_utils.hpp"
-
-int main(int argc, char *argv[]) {
- std::string arg_dataset;
- size_t arg_ivf_nprobe = 0;
- if (argc > 1){
- arg_dataset = argv[1];
- }
- if (argc > 2){
- arg_ivf_nprobe = atoi(argv[2]);
- }
- std::cout << "==> PDX IVF ADSampling\n";
-
- std::string ALGORITHM = "adsampling";
- const bool VERIFY_RESULTS = BenchmarkUtils::VERIFY_RESULTS;
-
- uint8_t KNN = BenchmarkUtils::KNN;
- float SELECTIVITY_THRESHOLD = BenchmarkUtils::SELECTIVITY_THRESHOLD;
- float EPSILON0 = BenchmarkUtils::EPSILON0;
- size_t NUM_QUERIES;
- size_t NUM_MEASURE_RUNS = BenchmarkUtils::NUM_MEASURE_RUNS;
-
- PDX::DimensionsOrder DIMENSION_ORDER = PDX::SEQUENTIAL;
-
- std::string RESULTS_PATH;
- RESULTS_PATH = BENCHMARK_UTILS.RESULTS_DIR_PATH + "IVF_PDX_ADSAMPLING.csv";
-
-
- for (const auto & dataset : BenchmarkUtils::DATASETS) {
- if (arg_dataset.size() > 0 && arg_dataset != dataset){
- continue;
- }
- PDX::IndexPDXIVF pdx_data = PDX::IndexPDXIVF();
- pdx_data.Restore(BenchmarkUtils::PDX_ADSAMPLING_DATA + dataset + "-ivf");
-
- std::unique_ptr _matrix_ptr = MmapFile(BenchmarkUtils::NARY_ADSAMPLING_DATA + dataset + "-matrix");
- auto *_matrix = reinterpret_cast(_matrix_ptr.get());
-
- std::unique_ptr query_ptr = MmapFile(BenchmarkUtils::QUERIES_DATA + dataset);
- auto *query = reinterpret_cast(query_ptr.get());
-
- NUM_QUERIES = 1000;
- std::unique_ptr ground_truth = MmapFile(BenchmarkUtils::GROUND_TRUTH_DATA + dataset + "_100_norm");
- auto *int_ground_truth = reinterpret_cast(ground_truth.get());
- query += 1; // skip number of embeddings
-
- PDX::ADSamplingPruner pruner = PDX::ADSamplingPruner(pdx_data.num_dimensions, EPSILON0, _matrix);
- PDX::PDXearch searcher = PDX::PDXearch(pdx_data, pruner, 1, DIMENSION_ORDER);
-
- std::vector nprobes_to_use;
- if (arg_ivf_nprobe > 0) {
- nprobes_to_use = {arg_ivf_nprobe};
- } else {
- nprobes_to_use.assign(std::begin(BenchmarkUtils::IVF_PROBES), std::end(BenchmarkUtils::IVF_PROBES));
- }
-
- for (size_t ivf_nprobe : nprobes_to_use) {
- if (pdx_data.num_clusters < ivf_nprobe){
- continue;
- }
- if (arg_ivf_nprobe > 0 && ivf_nprobe != arg_ivf_nprobe){
- continue;
- }
- std::vector runtimes;
- runtimes.resize(NUM_MEASURE_RUNS * NUM_QUERIES);
- searcher.SetNProbe(ivf_nprobe);
-
- float recalls = 0;
- if (VERIFY_RESULTS) {
- for (size_t l = 0; l < NUM_QUERIES; ++l) {
- auto result = searcher.Search(query + l * pdx_data.num_dimensions, KNN);
- BenchmarkUtils::VerifyResult(recalls, result, KNN, int_ground_truth, l);
- }
- }
- for (size_t j = 0; j < NUM_MEASURE_RUNS; ++j) {
- for (size_t l = 0; l < NUM_QUERIES; ++l) {
- searcher.Search(query + l * pdx_data.num_dimensions, KNN);
- runtimes[j + l * NUM_MEASURE_RUNS] = {
- searcher.end_to_end_clock.accum_time
- };
- }
- }
- float real_selectivity = 1 - SELECTIVITY_THRESHOLD;
- BenchmarkMetadata results_metadata = {
- dataset,
- ALGORITHM,
- NUM_MEASURE_RUNS,
- NUM_QUERIES,
- ivf_nprobe,
- KNN,
- recalls,
- real_selectivity
- };
- BenchmarkUtils::SaveResults(runtimes, RESULTS_PATH, results_metadata);
- }
- }
- return 0;
-}
\ No newline at end of file
diff --git a/benchmarks/bench_adsampling/pdx_ivf_adsampling_filtered.cpp b/benchmarks/bench_adsampling/pdx_ivf_adsampling_filtered.cpp
deleted file mode 100644
index b04401c..0000000
--- a/benchmarks/bench_adsampling/pdx_ivf_adsampling_filtered.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-#ifndef BENCHMARK_TIME
-#define BENCHMARK_TIME = true
-#endif
-
-#ifndef PDX_USE_EXPLICIT_SIMD
-#define PDX_USE_EXPLICIT_SIMD = true
-#endif
-
-#include
-#include
-#include "utils/file_reader.hpp"
-#include "index_base/pdx_ivf.hpp"
-#include "pdxearch.hpp"
-#include "pruners/adsampling.hpp"
-#include "utils/benchmark_utils.hpp"
-
-int main(int argc, char *argv[]) {
- std::string arg_dataset;
- std::string arg_selectivity;
- size_t arg_ivf_nprobe = 0;
- if (argc > 1){
- arg_dataset = argv[1];
- }
- if (argc > 2){
- arg_ivf_nprobe = atoi(argv[2]);
- }
- if (argc > 3){
- arg_selectivity = argv[3];
- } else {
- arg_selectivity = "0_99";
- }
- std::cout << "==> PDX IVF ADSampling\n";
-
- std::string ALGORITHM = "adsampling";
- const bool VERIFY_RESULTS = BenchmarkUtils::VERIFY_RESULTS;
-
- uint8_t KNN = BenchmarkUtils::KNN;
- float SELECTIVITY_THRESHOLD = BenchmarkUtils::SELECTIVITY_THRESHOLD;
- float EPSILON0 = BenchmarkUtils::EPSILON0;
- size_t NUM_QUERIES;
- size_t NUM_MEASURE_RUNS = BenchmarkUtils::NUM_MEASURE_RUNS;
-
- PDX::DimensionsOrder DIMENSION_ORDER = PDX::SEQUENTIAL;
-
- std::string RESULTS_PATH;
- RESULTS_PATH = BENCHMARK_UTILS.RESULTS_DIR_PATH + "IVF_PDX_ADSAMPLING_FILTERED.csv";
- std::cout << "==> SELECTIVITY: " << arg_selectivity << std::endl;
- for (const auto & dataset : BenchmarkUtils::DATASETS) {
- if (arg_dataset.size() > 0 && arg_dataset != dataset){
- continue;
- }
- PDX::IndexPDXIVF pdx_data = PDX::IndexPDXIVF();
- pdx_data.Restore(BenchmarkUtils::PDX_ADSAMPLING_DATA + dataset + "-ivf");
-
- std::unique_ptr _matrix_ptr = MmapFile(BenchmarkUtils::NARY_ADSAMPLING_DATA + dataset + "-matrix");
- auto *_matrix = reinterpret_cast(_matrix_ptr.get());
-
- std::unique_ptr query_ptr = MmapFile(BenchmarkUtils::QUERIES_DATA + dataset);
- auto *query = reinterpret_cast(query_ptr.get());
- NUM_QUERIES = 1000;
-
- std::unique_ptr ground_truth = MmapFile(BenchmarkUtils::FILTERED_GROUND_TRUTH_DATA + dataset + "_100_norm_" + arg_selectivity);
- auto *int_ground_truth = reinterpret_cast(ground_truth.get());
- query += 1; // skip number of embeddings
-
- PDX::PredicateEvaluator predicate_evaluator = PDX::PredicateEvaluator(pdx_data.num_clusters);
- predicate_evaluator.LoadSelectionVectorFromFile(BenchmarkUtils::SELECTION_VECTOR_DATA + dataset + "_" + arg_selectivity + ".bin");
- PDX::ADSamplingPruner pruner = PDX::ADSamplingPruner(pdx_data.num_dimensions, EPSILON0, _matrix);
- PDX::PDXearch searcher = PDX::PDXearch(pdx_data, pruner, 1, DIMENSION_ORDER);
-
- std::vector nprobes_to_use;
- if (arg_ivf_nprobe > 0) {
- nprobes_to_use = {arg_ivf_nprobe};
- } else {
- nprobes_to_use.assign(std::begin(BenchmarkUtils::IVF_PROBES), std::end(BenchmarkUtils::IVF_PROBES));
- }
-
- for (size_t ivf_nprobe : nprobes_to_use) {
- if (pdx_data.num_clusters < ivf_nprobe){
- continue;
- }
- if (arg_ivf_nprobe > 0 && ivf_nprobe != arg_ivf_nprobe){
- continue;
- }
- std::vector runtimes;
- runtimes.resize(NUM_MEASURE_RUNS * NUM_QUERIES);
- searcher.SetNProbe(ivf_nprobe);
-
- float recalls = 0;
- if (VERIFY_RESULTS) {
- for (size_t l = 0; l < NUM_QUERIES; ++l) {
- auto result = searcher.FilteredSearch(query + l * pdx_data.num_dimensions, KNN, predicate_evaluator);
- BenchmarkUtils::VerifyResult(recalls, result, KNN, int_ground_truth, l);
- }
- }
- for (size_t j = 0; j < NUM_MEASURE_RUNS; ++j) {
- for (size_t l = 0; l < NUM_QUERIES; ++l) {
- searcher.FilteredSearch(query + l * pdx_data.num_dimensions, KNN, predicate_evaluator);
- runtimes[j + l * NUM_MEASURE_RUNS] = {
- searcher.end_to_end_clock.accum_time
- };
- }
- }
- float real_selectivity = 1 - SELECTIVITY_THRESHOLD;
- BenchmarkMetadata results_metadata = {
- dataset,
- ALGORITHM,
- NUM_MEASURE_RUNS,
- NUM_QUERIES,
- ivf_nprobe,
- KNN,
- recalls,
- real_selectivity
- };
- BenchmarkUtils::SaveResults(runtimes, RESULTS_PATH, results_metadata);
- }
- }
- return 0;
-}
\ No newline at end of file
diff --git a/benchmarks/bench_adsampling/pdx_ivf_adsampling_u8.cpp b/benchmarks/bench_adsampling/pdx_ivf_adsampling_u8.cpp
deleted file mode 100644
index c8c779f..0000000
--- a/benchmarks/bench_adsampling/pdx_ivf_adsampling_u8.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-#ifndef BENCHMARK_TIME
-#define BENCHMARK_TIME = true
-#endif
-
-#ifndef PDX_USE_EXPLICIT_SIMD
-#define PDX_USE_EXPLICIT_SIMD = true
-#endif
-
-#include
-#include
-#include "utils/file_reader.hpp"
-#include "index_base/pdx_ivf.hpp"
-#include "pdxearch.hpp"
-#include "pruners/adsampling.hpp"
-#include "utils/benchmark_utils.hpp"
-
-int main(int argc, char *argv[]) {
- std::string arg_dataset;
- size_t arg_ivf_nprobe = 0;
- if (argc > 1){
- arg_dataset = argv[1];
- }
- if (argc > 2){
- arg_ivf_nprobe = atoi(argv[2]);
- }
- std::cout << "==> PDX IVF ADSampling\n";
-
- std::string ALGORITHM = "adsampling";
- const bool VERIFY_RESULTS = BenchmarkUtils::VERIFY_RESULTS;
-
- uint8_t KNN = BenchmarkUtils::KNN;
- float EPSILON0 = BenchmarkUtils::EPSILON0;
- size_t NUM_QUERIES;
- size_t NUM_MEASURE_RUNS = BenchmarkUtils::NUM_MEASURE_RUNS;
-
- PDX::DimensionsOrder DIMENSION_ORDER = PDX::SEQUENTIAL;
-
- std::string RESULTS_PATH;
- RESULTS_PATH = BENCHMARK_UTILS.RESULTS_DIR_PATH + "U8_IVF_PDX_ADSAMPLING.csv";
-
-
- for (const auto & dataset : BenchmarkUtils::DATASETS) {
- if (arg_dataset.size() > 0 && arg_dataset != dataset){
- continue;
- }
- PDX::IndexPDXIVF pdx_data = PDX::IndexPDXIVF();
- pdx_data.Restore(BenchmarkUtils::PDX_ADSAMPLING_DATA + dataset + "-ivf-u8");
- std::unique_ptr _matrix_ptr = MmapFile(BenchmarkUtils::NARY_ADSAMPLING_DATA + dataset + "-ivf-u8-matrix");
- auto *_matrix = reinterpret_cast(_matrix_ptr.get());
-
- std::unique_ptr query_ptr = MmapFile(BenchmarkUtils::QUERIES_DATA + dataset);
- auto *query = reinterpret_cast(query_ptr.get());
-
- NUM_QUERIES = 1000;
- std::unique_ptr ground_truth = MmapFile(BenchmarkUtils::GROUND_TRUTH_DATA + dataset + "_100_norm");
- auto *int_ground_truth = reinterpret_cast(ground_truth.get());
- query += 1; // skip number of embeddings
-
- PDX::ADSamplingPruner pruner = PDX::ADSamplingPruner(pdx_data.num_dimensions, EPSILON0, _matrix);
- PDX::PDXearch searcher = PDX::PDXearch(pdx_data, pruner, 1, DIMENSION_ORDER);
-
- std::vector nprobes_to_use;
- if (arg_ivf_nprobe > 0) {
- nprobes_to_use = {arg_ivf_nprobe};
- } else {
- nprobes_to_use.assign(std::begin(BenchmarkUtils::IVF_PROBES), std::end(BenchmarkUtils::IVF_PROBES));
- }
-
- for (size_t ivf_nprobe : nprobes_to_use) {
- if (pdx_data.num_clusters < ivf_nprobe){
- continue;
- }
- if (arg_ivf_nprobe > 0 && ivf_nprobe != arg_ivf_nprobe){
- continue;
- }
- std::vector runtimes;
- runtimes.resize(NUM_MEASURE_RUNS * NUM_QUERIES);
- searcher.SetNProbe(ivf_nprobe);
-
- float recalls = 0;
- if (VERIFY_RESULTS) {
- for (size_t l = 0; l < NUM_QUERIES; ++l) {
- auto result = searcher.Search(query + l * pdx_data.num_dimensions, KNN);
- BenchmarkUtils::VerifyResult(recalls, result, KNN, int_ground_truth, l);
- }
- }
- for (size_t j = 0; j < NUM_MEASURE_RUNS; ++j) {
- for (size_t l = 0; l < NUM_QUERIES; ++l) {
- searcher.Search(query + l * pdx_data.num_dimensions, KNN);
- runtimes[j + l * NUM_MEASURE_RUNS] = {
- searcher.end_to_end_clock.accum_time
- };
- }
- }
- float real_selectivity = 1 - BenchmarkUtils::SELECTIVITY_THRESHOLD;
- BenchmarkMetadata results_metadata = {
- dataset,
- ALGORITHM,
- NUM_MEASURE_RUNS,
- NUM_QUERIES,
- ivf_nprobe,
- KNN,
- recalls,
- real_selectivity
- };
- BenchmarkUtils::SaveResults(runtimes, RESULTS_PATH, results_metadata);
- }
- }
- return 0;
-}
\ No newline at end of file
diff --git a/benchmarks/bench_bond/pdx_bond.cpp b/benchmarks/bench_bond/pdx_bond.cpp
deleted file mode 100644
index 97330d9..0000000
--- a/benchmarks/bench_bond/pdx_bond.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-#ifndef BENCHMARK_TIME
-#define BENCHMARK_TIME = true
-#endif
-
-#ifndef PDX_USE_EXPLICIT_SIMD
-#define PDX_USE_EXPLICIT_SIMD = true
-#endif
-
-#include
-#include
-#include "utils/file_reader.hpp"
-#include "index_base/pdx_ivf.hpp"
-#include "pruners/bond.hpp"
-#include "pdxearch.hpp"
-#include "utils/benchmark_utils.hpp"
-
-int main(int argc, char *argv[]) {
- std::string arg_dataset;
- PDX::DimensionsOrder DIMENSION_ORDER = PDX::DISTANCE_TO_MEANS_IMPROVED;
- DIMENSION_ORDER = PDX::DISTANCE_TO_MEANS_IMPROVED;
- std::string ALGORITHM = "pdx-bond";
- if (argc > 1){
- arg_dataset = argv[1];
- }
- if (argc > 2){
- DIMENSION_ORDER = static_cast(atoi(argv[2]));
- if (DIMENSION_ORDER == PDX::DISTANCE_TO_MEANS_IMPROVED){
- ALGORITHM = "pdx-bond";
- }
- else if (DIMENSION_ORDER == PDX::DISTANCE_TO_MEANS){
- ALGORITHM = "pdx-bond-dtm";
- }
- else if (DIMENSION_ORDER == PDX::DECREASING_IMPROVED){
- ALGORITHM = "pdx-bond-dec";
- }
- else if (DIMENSION_ORDER == PDX::DECREASING){
- ALGORITHM = "pdx-bond-dec";
- }
- else if (DIMENSION_ORDER == PDX::SEQUENTIAL){
- ALGORITHM = "pdx-bond-sec";
- }
- else if (DIMENSION_ORDER == PDX::DIMENSION_ZONES){
- ALGORITHM = "pdx-bond-dz";
- }
- }
- std::cout << "==> PDX BOND EXACT\n";
-
- const bool VERIFY_RESULTS = BenchmarkUtils::VERIFY_RESULTS;
-
- uint8_t KNN = BenchmarkUtils::KNN;
- float SELECTIVITY_THRESHOLD = BenchmarkUtils::SELECTIVITY_THRESHOLD;
- size_t NUM_QUERIES;
- size_t NUM_MEASURE_RUNS = BenchmarkUtils::NUM_MEASURE_RUNS;
-
- std::string RESULTS_PATH = BENCHMARK_UTILS.RESULTS_DIR_PATH + "EXACT_PDX_BOND.csv";
-
- for (const auto & dataset : BenchmarkUtils::DATASETS) {
- if (arg_dataset.size() > 0 && arg_dataset != dataset){
- continue;
- }
- PDX::IndexPDXIVF pdx_data = PDX::IndexPDXIVF();
-
- pdx_data.Restore(BenchmarkUtils::PDX_DATA + dataset + "-flat");
- std::unique_ptr query_ptr = MmapFile(BenchmarkUtils::QUERIES_DATA + dataset);
- auto *query = reinterpret_cast(query_ptr.get());
-
- NUM_QUERIES = 1000;
- std::unique_ptr ground_truth = MmapFile(BenchmarkUtils::GROUND_TRUTH_DATA + dataset + "_100_norm");
- auto *int_ground_truth = reinterpret_cast(ground_truth.get());
- query += 1; // skip number of embeddings
-
- PDX::IndexPDXIVF nary_data = PDX::IndexPDXIVF();
-
- std::vector runtimes;
- runtimes.resize(NUM_MEASURE_RUNS * NUM_QUERIES);
-
- auto pruner = PDX::BondPruner(pdx_data.num_dimensions);
- PDX::PDXearch searcher = PDX::PDXearch(pdx_data, pruner, 0, DIMENSION_ORDER);
-
- float recalls = 0;
- if (VERIFY_RESULTS){
- for (size_t l = 0; l < NUM_QUERIES; ++l) {
- auto result = searcher.Search(query + l * pdx_data.num_dimensions, KNN);
- BenchmarkUtils::VerifyResult(recalls, result, KNN, int_ground_truth, l);
- }
- }
- for (size_t j = 0; j < NUM_MEASURE_RUNS; ++j) {
- for (size_t l = 0; l < NUM_QUERIES; ++l) {
- searcher.Search(query + l * pdx_data.num_dimensions, KNN);
- runtimes[j + l * NUM_MEASURE_RUNS] = {
- searcher.end_to_end_clock.accum_time
- };
- }
- }
- float real_selectivity = 1 - SELECTIVITY_THRESHOLD;
- BenchmarkMetadata results_metadata = {
- dataset,
- ALGORITHM,
- NUM_MEASURE_RUNS,
- NUM_QUERIES,
- 0,
- KNN,
- recalls,
- real_selectivity,
- };
- BenchmarkUtils::SaveResults(runtimes, RESULTS_PATH, results_metadata);
- }
-}
diff --git a/benchmarks/bench_bond/pdx_bond_ivf.cpp b/benchmarks/bench_bond/pdx_bond_ivf.cpp
deleted file mode 100644
index 53c87ad..0000000
--- a/benchmarks/bench_bond/pdx_bond_ivf.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-#ifndef BENCHMARK_TIME
-#define BENCHMARK_TIME = true
-#endif
-
-#ifndef PDX_USE_EXPLICIT_SIMD
-#define PDX_USE_EXPLICIT_SIMD = true
-#endif
-
-#include
-#include
-#include "utils/file_reader.hpp"
-#include "index_base/pdx_ivf.hpp"
-#include "pruners/bond.hpp"
-#include "pdxearch.hpp"
-#include "utils/benchmark_utils.hpp"
-
-int main(int argc, char *argv[]) {
- std::string arg_dataset;
- size_t arg_ivf_nprobe = 0;
- std::string ALGORITHM = "pdx-bond";
- PDX::DimensionsOrder DIMENSION_ORDER = PDX::SEQUENTIAL;
- DIMENSION_ORDER = PDX::DIMENSION_ZONES;
- if (argc > 1){
- arg_dataset = argv[1];
- }
- if (argc > 2){
- arg_ivf_nprobe = atoi(argv[2]);
- }
- if (argc > 3){
- // enum PDXearchDimensionsOrder {
- // SEQUENTIAL,
- // DISTANCE_TO_MEANS,
- // DECREASING,
- // DISTANCE_TO_MEANS_IMPROVED,
- // DECREASING_IMPROVED,
- // DIMENSION_ZONES
- // };
- DIMENSION_ORDER = static_cast(atoi(argv[3]));
- ALGORITHM = "pdx-bond";
- if (DIMENSION_ORDER == PDX::DISTANCE_TO_MEANS){
- ALGORITHM = "pdx-bond-dtm";
- }
- else if (DIMENSION_ORDER == PDX::DIMENSION_ZONES){
- ALGORITHM = "pdx-bond-dz";
- }
-
- }
- std::cout << "==> PDX IVF BOND\n";
-
- const bool VERIFY_RESULTS = BenchmarkUtils::VERIFY_RESULTS;
-
- uint8_t KNN = BenchmarkUtils::KNN;
- float SELECTIVITY_THRESHOLD = BenchmarkUtils::SELECTIVITY_THRESHOLD;
- size_t NUM_QUERIES;
- size_t NUM_MEASURE_RUNS = BenchmarkUtils::NUM_MEASURE_RUNS;
-
- std::string RESULTS_PATH = BENCHMARK_UTILS.RESULTS_DIR_PATH + "IVF_PDX_BOND.csv";
-
-
- for (const auto & dataset : BenchmarkUtils::DATASETS) {
- if (arg_dataset.size() > 0 && arg_dataset != dataset){
- continue;
- }
- PDX::IndexPDXIVF pdx_data = PDX::IndexPDXIVF();
-
- pdx_data.Restore(BenchmarkUtils::PDX_DATA + dataset + "-ivf");
-
- std::unique_ptr query_ptr = MmapFile(BenchmarkUtils::QUERIES_DATA + dataset);
- auto *query = reinterpret_cast(query_ptr.get());
-
- NUM_QUERIES = 1000;
- std::unique_ptr ground_truth = MmapFile(BenchmarkUtils::GROUND_TRUTH_DATA + dataset + "_100_norm");
- auto *int_ground_truth = reinterpret_cast(ground_truth.get());
- query += 1; // skip number of embeddings
-
- auto pruner = PDX::BondPruner(pdx_data.num_dimensions);
- PDX::PDXearch searcher = PDX::PDXearch(pdx_data, pruner, 0, DIMENSION_ORDER);
-
- std::vector nprobes_to_use;
- if (arg_ivf_nprobe > 0) {
- nprobes_to_use = {arg_ivf_nprobe};
- } else {
- nprobes_to_use.assign(std::begin(BenchmarkUtils::IVF_PROBES), std::end(BenchmarkUtils::IVF_PROBES));
- }
-
- for (size_t ivf_nprobe : nprobes_to_use) {
- if (pdx_data.num_clusters < ivf_nprobe) {
- continue;
- }
- if (arg_ivf_nprobe > 0 && ivf_nprobe != arg_ivf_nprobe){
- continue;
- }
- std::vector runtimes;
- runtimes.resize(NUM_MEASURE_RUNS * NUM_QUERIES);
- searcher.SetNProbe(ivf_nprobe);
-
- float recalls = 0;
- if (VERIFY_RESULTS){
- for (size_t l = 0; l < NUM_QUERIES; ++l) {
- auto result = searcher.Search(query + l * pdx_data.num_dimensions, KNN);
- BenchmarkUtils::VerifyResult(recalls, result, KNN, int_ground_truth, l);
- }
- }
- for (size_t j = 0; j < NUM_MEASURE_RUNS; ++j) {
- for (size_t l = 0; l < NUM_QUERIES; ++l) {
- searcher.Search(query + l * pdx_data.num_dimensions, KNN);
- runtimes[j + l * NUM_MEASURE_RUNS] = {
- searcher.end_to_end_clock.accum_time
- };
- }
- }
- float real_selectivity = 1 - SELECTIVITY_THRESHOLD;
- BenchmarkMetadata results_metadata = {
- dataset,
- ALGORITHM,
- NUM_MEASURE_RUNS,
- NUM_QUERIES,
- ivf_nprobe,
- KNN,
- recalls,
- real_selectivity,
- };
- BenchmarkUtils::SaveResults(runtimes, RESULTS_PATH, results_metadata);
- }
- }
-}
diff --git a/benchmarks/benchmark_utils.hpp b/benchmarks/benchmark_utils.hpp
new file mode 100644
index 0000000..345a8c9
--- /dev/null
+++ b/benchmarks/benchmark_utils.hpp
@@ -0,0 +1,371 @@
+#pragma once
+
+#include "pdx/common.hpp"
+#include "pdx/utils.hpp"
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+class TicToc {
+ public:
+ size_t accum_time = 0;
+ std::chrono::high_resolution_clock::time_point start =
+ std::chrono::high_resolution_clock::now();
+
+ void Reset() {
+ accum_time = 0;
+ start = std::chrono::high_resolution_clock::now();
+ }
+
+ inline void Tic() { start = std::chrono::high_resolution_clock::now(); }
+
+ inline void Toc() {
+ auto end = std::chrono::high_resolution_clock::now();
+ accum_time += std::chrono::duration_cast(end - start).count();
+ }
+
+ double GetMilliseconds() const { return static_cast(accum_time) / 1e6; }
+};
+
+// Raw binary data paths (SuperKMeans convention: data_.bin / data__test.bin)
+inline std::string RAW_DATA_DIR = std::string{CMAKE_SOURCE_DIR} + "/../SuperKMeans/benchmarks/data";
+inline std::string GROUND_TRUTH_JSON_DIR =
+ std::string{CMAKE_SOURCE_DIR} + "/../SuperKMeans/benchmarks/ground_truth";
+
+struct RawDatasetInfo {
+ size_t num_embeddings;
+ size_t num_dimensions;
+ size_t num_queries;
+ PDX::DistanceMetric distance_metric;
+ std::string pdx_dataset_name; // Name used in PDX ground truth / query files
+};
+
+inline const std::unordered_map RAW_DATASET_PARAMS = {
+ {"sift", {1000000, 128, 1000, PDX::DistanceMetric::L2SQ, "sift-128-euclidean"}},
+ {"yi", {187843, 128, 1000, PDX::DistanceMetric::IP, "yi-128-ip"}},
+ {"llama", {256921, 128, 1000, PDX::DistanceMetric::IP, "llama-128-ip"}},
+ {"glove200", {1183514, 200, 1000, PDX::DistanceMetric::COSINE, "glove-200-angular"}},
+ {"yandex", {1000000, 200, 1000, PDX::DistanceMetric::COSINE, "yandex-200-cosine"}},
+ {"yahoo", {677305, 384, 1000, PDX::DistanceMetric::COSINE, "yahoo-minilm-384-normalized"}},
+ {"clip", {1281167, 512, 1000, PDX::DistanceMetric::L2SQ, "imagenet-clip-512-normalized"}},
+ {"contriever", {990000, 768, 1000, PDX::DistanceMetric::L2SQ, "contriever-768"}},
+ {"gist", {1000000, 960, 1000, PDX::DistanceMetric::L2SQ, "gist-960-euclidean"}},
+ {"mxbai", {769382, 1024, 1000, PDX::DistanceMetric::L2SQ, "agnews-mxbai-1024-euclidean"}},
+ {"openai", {999000, 1536, 1000, PDX::DistanceMetric::L2SQ, "openai-1536-angular"}},
+ {"arxiv", {2253000, 768, 1000, PDX::DistanceMetric::L2SQ, "instructorxl-arxiv-768"}},
+ {"wiki", {260372, 3072, 1000, PDX::DistanceMetric::L2SQ, "simplewiki-openai-3072-normalized"}},
+ {"cohere", {10000000, 1024, 1000, PDX::DistanceMetric::L2SQ, "cohere"}},
+};
+
+struct BenchmarkMetadata {
+ std::string dataset;
+ std::string algorithm;
+ size_t num_measure_runs{0};
+ size_t num_queries{100};
+ size_t ivf_nprobe{0};
+ size_t knn{10};
+ float recalls{1.0};
+ float selectivity_threshold{0.0};
+ float epsilon{0.0};
+};
+
+struct PhasesRuntime {
+ size_t end_to_end{0};
+};
+
+class BenchmarkUtils {
+ public:
+ inline static std::string PDX_DATA =
+ std::string{CMAKE_SOURCE_DIR} + "/benchmarks/datasets/pdx/";
+ inline static std::string PDX_ADSAMPLING_DATA =
+ std::string{CMAKE_SOURCE_DIR} + "/benchmarks/datasets/adsampling_pdx/";
+ inline static std::string GROUND_TRUTH_DATA =
+ std::string{CMAKE_SOURCE_DIR} + "/benchmarks/datasets/ground_truth/";
+ inline static std::string FILTERED_GROUND_TRUTH_DATA =
+ std::string{CMAKE_SOURCE_DIR} + "/benchmarks/datasets/ground_truth_filtered/";
+ inline static std::string PURESCAN_DATA =
+ std::string{CMAKE_SOURCE_DIR} + "/benchmarks/datasets/purescan/";
+ inline static std::string QUERIES_DATA =
+ std::string{CMAKE_SOURCE_DIR} + "/benchmarks/datasets/queries/";
+ inline static std::string SELECTION_VECTOR_DATA =
+ std::string{CMAKE_SOURCE_DIR} + "/benchmarks/datasets/selection_vectors/";
+
+ std::string CPU_ARCHITECTURE = "DEFAULT";
+ std::string RESULTS_DIR_PATH =
+ std::string{CMAKE_SOURCE_DIR} + "/benchmarks/results/" + CPU_ARCHITECTURE + "/";
+
+ explicit BenchmarkUtils() {
+ CPU_ARCHITECTURE = std::getenv("PDX_ARCH") ? std::getenv("PDX_ARCH") : "DEFAULT";
+ RESULTS_DIR_PATH =
+ std::string{CMAKE_SOURCE_DIR} + "/benchmarks/results/" + CPU_ARCHITECTURE + "/";
+ }
+
+ inline static std::string DATASETS[] = {
+ "sift-128-euclidean",
+ "yi-128-ip",
+ "llama-128-ip",
+ "glove-200-angular",
+ "yandex-200-cosine",
+ "word2vec-300",
+ "yahoo-minilm-384-normalized",
+ "msong-420",
+ "imagenet-clip-512-normalized",
+ "laion-clip-512-normalized",
+ "imagenet-align-640-normalized",
+ "codesearchnet-jina-768-cosine",
+ "landmark-dino-768-cosine",
+ "landmark-nomic-768-normalized",
+ "arxiv-nomic-768-normalized",
+ "ccnews-nomic-768-normalized",
+ "coco-nomic-768-normalized",
+ "contriever-768",
+ "instructorxl-arxiv-768",
+ "gooaq-distilroberta-768-normalized",
+ "gist-960-euclidean",
+ "agnews-mxbai-1024-euclidean",
+ "cohere",
+ "openai-1536-angular",
+ "celeba-resnet-2048-cosine",
+ "simplewiki-openai-3072-normalized"
+ };
+
+ inline static std::string FILTERED_SELECTIVITIES[] = {
+ "0_000135",
+ "0_001",
+ "0_01",
+ "0_1",
+ "0_2",
+ "0_3",
+ "0_4",
+ "0_5",
+ "0_75",
+ "0_9",
+ "0_95",
+ "0_99",
+ "PART_1",
+ "PART_30",
+ "PART+_1",
+ };
+
+ inline static size_t IVF_PROBES[] = {
+ // 4000, 3980, 3967, 2048, 1024, 512, 256,224,192,160,144,128,
+ 2048, 1536, 1024, 512, 384, 256, 224, 192, 160, 144, 128, 112, 96, 80, 64, 56, 48,
+ 40, 32, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 1
+ };
+
+ inline static int POW_10[10] =
+ {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000};
+
+ inline static size_t IVF_PROBES_PHASES[] = {
+ 512,
+ 256,
+ 128,
+ 64,
+ 32,
+ 16,
+ 8,
+ 4,
+ 2,
+ };
+
+ inline static size_t NUM_MEASURE_RUNS = 1;
+ inline static float SELECTIVITY_THRESHOLD = 0.80; // more than 20% pruned to pass
+ inline static bool VERIFY_RESULTS = true;
+ inline static uint8_t KNN = 20;
+
+ inline static uint8_t GROUND_TRUTH_MAX_K =
+ 100; // To properly skip on the ground truth file (do not change)
+
+ template
+ static void VerifyResult(
+ float& recalls,
+ const std::vector& result,
+ size_t knn,
+ const uint32_t* int_ground_truth,
+ size_t n_query
+ ) {
+ std::unordered_set seen;
+ for (const auto& val : result) {
+ if (!seen.insert(val.index).second) {
+ throw std::runtime_error(
+ "Duplicates detected in the result set! This is likely a bug on PDXearch"
+ );
+ }
+ }
+ if (result.size() < knn) {
+ std::cerr << "WARNING: Result set is not complete! Set a higher `nbuckets` parameter "
+ "(Only got "
+ << result.size() << " results)" << std::endl;
+ }
+ if constexpr (MEASURE_RECALL) {
+ size_t true_positives = 0;
+ for (size_t j = 0; j < result.size(); ++j) {
+ for (size_t m = 0; m < knn; ++m) {
+ if (result[j].index == int_ground_truth[m + n_query * GROUND_TRUTH_MAX_K]) {
+ true_positives++;
+ break;
+ }
+ }
+ }
+ recalls += 1.0 * true_positives / knn;
+ } else {
+ for (size_t j = 0; j < knn; ++j) {
+ if (result[j].index != int_ground_truth[j + n_query * GROUND_TRUTH_MAX_K]) {
+ std::cout << "WRONG RESULT!\n";
+ break;
+ }
+ }
+ }
+ }
+
+ // We remove extreme outliers on both sides (Q3 + 1.5*IQR & Q1 - 1.5*IQR)
+ static void SaveResults(
+ std::vector runtimes,
+ const std::string& results_path,
+ const BenchmarkMetadata& metadata
+ ) {
+ bool write_header = true;
+ if (std::filesystem::exists(results_path)) {
+ write_header = false;
+ }
+ std::ofstream file{results_path, std::ios::app};
+ size_t min_runtime = std::numeric_limits::max();
+ size_t max_runtime = std::numeric_limits::min();
+ size_t sum_runtimes = 0;
+ size_t all_min_runtime = std::numeric_limits::max();
+ size_t all_max_runtime = std::numeric_limits::min();
+ size_t all_sum_runtimes = 0;
+ auto const Q1 = runtimes.size() / 4;
+ auto const Q2 = runtimes.size() / 2;
+ auto const Q3 = Q1 + Q2;
+ std::sort(runtimes.begin(), runtimes.end(), [](PhasesRuntime i1, PhasesRuntime i2) {
+ return i1.end_to_end < i2.end_to_end;
+ });
+ auto const iqr = runtimes[Q3].end_to_end - runtimes[Q1].end_to_end;
+ size_t accounted_queries = 0;
+ for (size_t j = 0; j < metadata.num_measure_runs * metadata.num_queries; ++j) {
+ all_min_runtime = std::min(all_min_runtime, runtimes[j].end_to_end);
+ all_max_runtime = std::max(all_max_runtime, runtimes[j].end_to_end);
+ all_sum_runtimes += runtimes[j].end_to_end;
+ // Removing outliers
+ if (runtimes[j].end_to_end > runtimes[Q3].end_to_end + 1.5 * iqr) {
+ continue;
+ }
+ if (runtimes[j].end_to_end < runtimes[Q1].end_to_end - 1.5 * iqr) {
+ continue;
+ }
+ min_runtime = std::min(min_runtime, runtimes[j].end_to_end);
+ max_runtime = std::max(max_runtime, runtimes[j].end_to_end);
+ sum_runtimes += runtimes[j].end_to_end;
+ accounted_queries += 1;
+ }
+ double all_min_runtime_ms = 1.0 * all_min_runtime / 1000000;
+ double all_max_runtime_ms = 1.0 * all_max_runtime / 1000000;
+ double all_avg_runtime_ms =
+ 1.0 * all_sum_runtimes / (1000000 * (metadata.num_measure_runs * metadata.num_queries));
+ double min_runtime_ms = 1.0 * min_runtime / 1000000;
+ double max_runtime_ms = 1.0 * max_runtime / 1000000;
+ double avg_runtime_ms = 1.0 * sum_runtimes / (1000000 * accounted_queries);
+ double avg_recall = metadata.recalls / metadata.num_queries;
+
+ std::cout << metadata.dataset << " --------------\n";
+ std::cout << "n_queries: " << metadata.num_queries << "\n";
+ if (metadata.ivf_nprobe > 0) {
+ std::cout << "nprobe: " << metadata.ivf_nprobe << "\n";
+ }
+ std::cout << "avg: " << std::setprecision(6) << avg_runtime_ms << "\n";
+ std::cout << "max: " << std::setprecision(6) << max_runtime_ms << "\n";
+ std::cout << "min: " << std::setprecision(6) << min_runtime_ms << "\n";
+ std::cout << "rec: " << std::setprecision(6) << avg_recall << "\n";
+
+ if (write_header) {
+ file << "dataset,algorithm,avg,max,min,recall,ivf_nprobe,epsilon,"
+ "knn,n_queries,selectivity,"
+ "num_measure_runs,avg_all,max_all,min_all"
+ << "\n";
+ }
+ file << metadata.dataset << "," << metadata.algorithm << "," << std::setprecision(6)
+ << avg_runtime_ms << "," << std::setprecision(6) << max_runtime_ms << ","
+ << std::setprecision(6) << min_runtime_ms << "," << avg_recall << ","
+ << metadata.ivf_nprobe << "," << metadata.epsilon << "," << +metadata.knn << ","
+ << metadata.num_queries << "," << std::setprecision(4)
+ << metadata.selectivity_threshold << "," << metadata.num_measure_runs << ","
+ << all_avg_runtime_ms << "," << all_max_runtime_ms << "," << all_min_runtime_ms
+ << "\n";
+ file.close();
+ }
+};
+
+BenchmarkUtils BENCHMARK_UTILS;
+
+inline std::unordered_map> ParseGroundTruthJson(const std::string& filename) {
+ std::unordered_map> gt_map;
+ std::ifstream file(filename);
+ if (!file.is_open())
+ return gt_map;
+
+ std::string line;
+ std::getline(file, line);
+
+ size_t pos = 0;
+ while ((pos = line.find("\"", pos)) != std::string::npos) {
+ size_t key_start = pos + 1;
+ size_t key_end = line.find("\"", key_start);
+ if (key_end == std::string::npos)
+ break;
+
+ int query_idx = std::stoi(line.substr(key_start, key_end - key_start));
+
+ size_t arr_start = line.find("[", key_end);
+ size_t arr_end = line.find("]", arr_start);
+ if (arr_start == std::string::npos || arr_end == std::string::npos)
+ break;
+
+ std::string arr_str = line.substr(arr_start + 1, arr_end - arr_start - 1);
+ std::vector ids;
+ std::istringstream iss(arr_str);
+ std::string token;
+ while (std::getline(iss, token, ',')) {
+ token.erase(0, token.find_first_not_of(" \t"));
+ token.erase(token.find_last_not_of(" \t") + 1);
+ if (!token.empty())
+ ids.push_back(std::stoi(token));
+ }
+ gt_map[query_idx] = ids;
+ pos = arr_end + 1;
+ }
+ return gt_map;
+}
+
+inline float ComputeRecallFromJson(
+ const std::vector& result,
+ const std::vector& gt_ids,
+ size_t knn
+) {
+ size_t hits = 0;
+ size_t gt_count = std::min(knn, gt_ids.size());
+ for (size_t i = 0; i < result.size(); i++) {
+ for (size_t j = 0; j < gt_count; j++) {
+ if (result[i].index == static_cast(gt_ids[j])) {
+ hits++;
+ break;
+ }
+ }
+ }
+ return static_cast(hits) / static_cast(gt_count);
+}
diff --git a/benchmarks/bench_kernels/README.md b/benchmarks/kernels_playground/README.md
similarity index 100%
rename from benchmarks/bench_kernels/README.md
rename to benchmarks/kernels_playground/README.md
diff --git a/benchmarks/bench_kernels/kernels.cpp b/benchmarks/kernels_playground/kernels.cpp
similarity index 66%
rename from benchmarks/bench_kernels/kernels.cpp
rename to benchmarks/kernels_playground/kernels.cpp
index 49f1855..4e59f3d 100644
--- a/benchmarks/bench_kernels/kernels.cpp
+++ b/benchmarks/kernels_playground/kernels.cpp
@@ -1,9 +1,9 @@
-#include
-#include
-#include
-#include
#include
+#include
+#include
+#include
#include
+#include
#include
#if defined(__ARM_NEON)
@@ -14,47 +14,46 @@
#include
#endif
-
-template
+template
struct KNNCandidate {
uint32_t index;
float distance;
};
-template<>
+template <>
struct KNNCandidate {
uint32_t index;
float distance;
};
-template<>
+template <>
struct KNNCandidate {
uint32_t index;
uint32_t distance;
};
-template
+template
struct DistanceType {
using type = float; // default for f32
};
-template<>
+template <>
struct DistanceType {
using type = uint32_t;
};
-template
+template
using DistanceType_t = typename DistanceType::type;
-template
+template
struct VectorComparator {
- bool operator() (const KNNCandidate& a, const KNNCandidate& b) {
+ bool operator()(const KNNCandidate& a, const KNNCandidate& b) {
return a.distance < b.distance;
}
};
-template
+template
struct VectorComparatorInverse {
- bool operator() (const KNNCandidate& a, const KNNCandidate& b) {
+ bool operator()(const KNNCandidate& a, const KNNCandidate& b) {
return a.distance > b.distance;
}
};
@@ -84,12 +83,7 @@ static constexpr size_t U8_N_REGISTERS_AVX = 4;
// SIMD
////////////////
-
-inline float f32_simd_ip(
- const float *first_vector,
- const float *second_vector,
- const size_t d
-) {
+inline float f32_simd_ip(const float* first_vector, const float* second_vector, const size_t d) {
#if defined(__APPLE__)
float distance = 0.0;
#pragma clang loop vectorize(enable)
@@ -115,35 +109,32 @@ inline float f32_simd_ip(
__m512 a_vec, b_vec;
size_t num_dimensions = d;
- simsimd_ip_f32_skylake_cycle:
- if (num_dimensions < 16) {
- __mmask16 mask = (__mmask16)_bzhi_u32(0xFFFFFFFF, num_dimensions);
- a_vec = _mm512_maskz_loadu_ps(mask, first_vector);
- b_vec = _mm512_maskz_loadu_ps(mask, second_vector);
- num_dimensions = 0;
- } else {
- a_vec = _mm512_loadu_ps(first_vector);
- b_vec = _mm512_loadu_ps(second_vector);
- first_vector += 16, second_vector += 16, num_dimensions -= 16;
- }
+simsimd_ip_f32_skylake_cycle:
+ if (num_dimensions < 16) {
+ __mmask16 mask = (__mmask16) _bzhi_u32(0xFFFFFFFF, num_dimensions);
+ a_vec = _mm512_maskz_loadu_ps(mask, first_vector);
+ b_vec = _mm512_maskz_loadu_ps(mask, second_vector);
+ num_dimensions = 0;
+ } else {
+ a_vec = _mm512_loadu_ps(first_vector);
+ b_vec = _mm512_loadu_ps(second_vector);
+ first_vector += 16, second_vector += 16, num_dimensions -= 16;
+ }
d2_vec = _mm512_fmadd_ps(a_vec, b_vec, d2_vec);
if (num_dimensions)
goto simsimd_ip_f32_skylake_cycle;
// _simsimd_reduce_f32x16_skylake
__m512 x = _mm512_add_ps(d2_vec, _mm512_shuffle_f32x4(d2_vec, d2_vec, _MM_SHUFFLE(0, 0, 3, 2)));
- __m128 r = _mm512_castps512_ps128(_mm512_add_ps(x, _mm512_shuffle_f32x4(x, x, _MM_SHUFFLE(0, 0, 0, 1))));
+ __m128 r =
+ _mm512_castps512_ps128(_mm512_add_ps(x, _mm512_shuffle_f32x4(x, x, _MM_SHUFFLE(0, 0, 0, 1)))
+ );
r = _mm_hadd_ps(r, r);
return _mm_cvtss_f32(_mm_hadd_ps(r, r));
#endif
}
-
-inline float f32_simd_l2(
- const float *first_vector,
- const float *second_vector,
- const size_t d
-) {
+inline float f32_simd_l2(const float* first_vector, const float* second_vector, const size_t d) {
#if defined(__APPLE__)
float distance = 0.0;
#pragma clang loop vectorize(enable)
@@ -173,17 +164,17 @@ inline float f32_simd_l2(
__m512 a_vec, b_vec;
size_t num_dimensions = d;
- simsimd_l2sq_f32_skylake_cycle:
- if (d < 16) {
- __mmask16 mask = (__mmask16)_bzhi_u32(0xFFFFFFFF, num_dimensions);
- a_vec = _mm512_maskz_loadu_ps(mask, first_vector);
- b_vec = _mm512_maskz_loadu_ps(mask, second_vector);
- num_dimensions = 0;
- } else {
- a_vec = _mm512_loadu_ps(first_vector);
- b_vec = _mm512_loadu_ps(second_vector);
- first_vector += 16, second_vector += 16, num_dimensions -= 16;
- }
+simsimd_l2sq_f32_skylake_cycle:
+ if (d < 16) {
+ __mmask16 mask = (__mmask16) _bzhi_u32(0xFFFFFFFF, num_dimensions);
+ a_vec = _mm512_maskz_loadu_ps(mask, first_vector);
+ b_vec = _mm512_maskz_loadu_ps(mask, second_vector);
+ num_dimensions = 0;
+ } else {
+ a_vec = _mm512_loadu_ps(first_vector);
+ b_vec = _mm512_loadu_ps(second_vector);
+ first_vector += 16, second_vector += 16, num_dimensions -= 16;
+ }
__m512 d_vec = _mm512_sub_ps(a_vec, b_vec);
d2_vec = _mm512_fmadd_ps(d_vec, d_vec, d2_vec);
if (num_dimensions)
@@ -191,16 +182,17 @@ inline float f32_simd_l2(
// _simsimd_reduce_f32x16_skylake
__m512 x = _mm512_add_ps(d2_vec, _mm512_shuffle_f32x4(d2_vec, d2_vec, _MM_SHUFFLE(0, 0, 3, 2)));
- __m128 r = _mm512_castps512_ps128(_mm512_add_ps(x, _mm512_shuffle_f32x4(x, x, _MM_SHUFFLE(0, 0, 0, 1))));
+ __m128 r =
+ _mm512_castps512_ps128(_mm512_add_ps(x, _mm512_shuffle_f32x4(x, x, _MM_SHUFFLE(0, 0, 0, 1)))
+ );
r = _mm_hadd_ps(r, r);
return _mm_cvtss_f32(_mm_hadd_ps(r, r));
#endif
}
-
inline uint32_t u8_simd_l2(
- const uint8_t *first_vector,
- const uint8_t *second_vector,
+ const uint8_t* first_vector,
+ const uint8_t* second_vector,
const size_t d
) {
#if defined(__ARM_NEON)
@@ -223,32 +215,34 @@ inline uint32_t u8_simd_l2(
__m512i a_u8_vec, b_u8_vec;
size_t num_dimensions = d;
- simsimd_l2sq_u8_ice_cycle:
- if (num_dimensions < 64) {
- const __mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFF, num_dimensions);
- a_u8_vec = _mm512_maskz_loadu_epi8(mask, first_vector);
- b_u8_vec = _mm512_maskz_loadu_epi8(mask, second_vector);
- num_dimensions = 0;
- }
- else {
- a_u8_vec = _mm512_loadu_si512(first_vector);
- b_u8_vec = _mm512_loadu_si512(second_vector);
- first_vector += 64, second_vector += 64, num_dimensions -= 64;
- }
+simsimd_l2sq_u8_ice_cycle:
+ if (num_dimensions < 64) {
+ const __mmask64 mask = (__mmask64) _bzhi_u64(0xFFFFFFFFFFFFFFFF, num_dimensions);
+ a_u8_vec = _mm512_maskz_loadu_epi8(mask, first_vector);
+ b_u8_vec = _mm512_maskz_loadu_epi8(mask, second_vector);
+ num_dimensions = 0;
+ } else {
+ a_u8_vec = _mm512_loadu_si512(first_vector);
+ b_u8_vec = _mm512_loadu_si512(second_vector);
+ first_vector += 64, second_vector += 64, num_dimensions -= 64;
+ }
// Substracting unsigned vectors in AVX-512 is done by saturating subtraction:
- __m512i d_u8_vec = _mm512_or_si512(_mm512_subs_epu8(a_u8_vec, b_u8_vec), _mm512_subs_epu8(b_u8_vec, a_u8_vec));
+ __m512i d_u8_vec =
+ _mm512_or_si512(_mm512_subs_epu8(a_u8_vec, b_u8_vec), _mm512_subs_epu8(b_u8_vec, a_u8_vec));
- // Multiply and accumulate at `int8` level which are actually uint7, accumulate at `int32` level:
+ // Multiply and accumulate at `int8` level which are actually uint7, accumulate at `int32`
+ // level:
d2_i32_vec = _mm512_dpbusds_epi32(d2_i32_vec, d_u8_vec, d_u8_vec);
- if (num_dimensions) goto simsimd_l2sq_u8_ice_cycle;
+ if (num_dimensions)
+ goto simsimd_l2sq_u8_ice_cycle;
return _mm512_reduce_add_epi32(d2_i32_vec);
#endif
};
inline uint32_t u8_simd_ip(
- const uint8_t *first_vector,
- const uint8_t *second_vector,
+ const uint8_t* first_vector,
+ const uint8_t* second_vector,
const size_t d
) {
#if defined(__ARM_NEON)
@@ -269,22 +263,23 @@ inline uint32_t u8_simd_ip(
__m512i a_u8_vec, b_u8_vec;
size_t num_dimensions = d;
- simsimd_l2sq_u8_ice_cycle:
- if (num_dimensions < 64) {
- const __mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFF, num_dimensions);
- a_u8_vec = _mm512_maskz_loadu_epi8(mask, first_vector);
- b_u8_vec = _mm512_maskz_loadu_epi8(mask, second_vector);
- num_dimensions = 0;
- }
- else {
- a_u8_vec = _mm512_loadu_si512(first_vector);
- b_u8_vec = _mm512_loadu_si512(second_vector);
- first_vector += 64, second_vector += 64, num_dimensions -= 64;
- }
+simsimd_l2sq_u8_ice_cycle:
+ if (num_dimensions < 64) {
+ const __mmask64 mask = (__mmask64) _bzhi_u64(0xFFFFFFFFFFFFFFFF, num_dimensions);
+ a_u8_vec = _mm512_maskz_loadu_epi8(mask, first_vector);
+ b_u8_vec = _mm512_maskz_loadu_epi8(mask, second_vector);
+ num_dimensions = 0;
+ } else {
+ a_u8_vec = _mm512_loadu_si512(first_vector);
+ b_u8_vec = _mm512_loadu_si512(second_vector);
+ first_vector += 64, second_vector += 64, num_dimensions -= 64;
+ }
- // Multiply and accumulate at `int8` level which are actually uint7, accumulate at `int32` level:
+ // Multiply and accumulate at `int8` level which are actually uint7, accumulate at `int32`
+ // level:
d2_i32_vec = _mm512_dpbusds_epi32(d2_i32_vec, a_u8_vec, b_u8_vec);
- if (num_dimensions) goto simsimd_l2sq_u8_ice_cycle;
+ if (num_dimensions)
+ goto simsimd_l2sq_u8_ice_cycle;
return _mm512_reduce_add_epi32(d2_i32_vec);
#endif
};
@@ -293,59 +288,45 @@ inline uint32_t u8_simd_ip(
// PDX
////////////////
-
-inline void f32_pdx_ip(
- const float *first_vector,
- const float *second_vector,
- const size_t d
-) {
+inline void f32_pdx_ip(const float* first_vector, const float* second_vector, const size_t d) {
memset((void*) distances_f32, 0.0, F32_PDX_VECTOR_SIZE * sizeof(float));
for (size_t dim_idx = 0; dim_idx < d; dim_idx++) {
const size_t dimension_idx = dim_idx;
const size_t offset_to_dimension_start = dimension_idx * F32_PDX_VECTOR_SIZE;
for (size_t vector_idx = 0; vector_idx < F32_PDX_VECTOR_SIZE; ++vector_idx) {
- distances_f32[vector_idx] += second_vector[dimension_idx] * first_vector[offset_to_dimension_start + vector_idx];
+ distances_f32[vector_idx] +=
+ second_vector[dimension_idx] * first_vector[offset_to_dimension_start + vector_idx];
}
}
}
-inline void f32_pdx_l1(
- const float *first_vector,
- const float *second_vector,
- const size_t d
-) {
+inline void f32_pdx_l1(const float* first_vector, const float* second_vector, const size_t d) {
memset((void*) distances_f32, 0.0, F32_PDX_VECTOR_SIZE * sizeof(float));
for (size_t dim_idx = 0; dim_idx < d; dim_idx++) {
const size_t dimension_idx = dim_idx;
const size_t offset_to_dimension_start = dimension_idx * F32_PDX_VECTOR_SIZE;
for (size_t vector_idx = 0; vector_idx < F32_PDX_VECTOR_SIZE; ++vector_idx) {
- float to_abs = second_vector[dimension_idx] - first_vector[offset_to_dimension_start + vector_idx];
+ float to_abs =
+ second_vector[dimension_idx] - first_vector[offset_to_dimension_start + vector_idx];
distances_f32[vector_idx] += std::fabs(to_abs);
}
}
}
-inline void f32_pdx_l2(
- const float *first_vector,
- const float *second_vector,
- const size_t d
-) {
+inline void f32_pdx_l2(const float* first_vector, const float* second_vector, const size_t d) {
memset((void*) distances_f32, 0.0, F32_PDX_VECTOR_SIZE * sizeof(float));
for (size_t dim_idx = 0; dim_idx < d; dim_idx++) {
const size_t dimension_idx = dim_idx;
const size_t offset_to_dimension_start = dimension_idx * F32_PDX_VECTOR_SIZE;
for (size_t vector_idx = 0; vector_idx < F32_PDX_VECTOR_SIZE; ++vector_idx) {
- float to_multiply = second_vector[dimension_idx] - first_vector[offset_to_dimension_start + vector_idx];
+ float to_multiply =
+ second_vector[dimension_idx] - first_vector[offset_to_dimension_start + vector_idx];
distances_f32[vector_idx] += to_multiply * to_multiply;
}
}
}
-inline void u8_pdx_l2(
- const uint8_t *first_vector,
- const uint8_t *second_vector,
- const size_t d
-) {
+inline void u8_pdx_l2(const uint8_t* first_vector, const uint8_t* second_vector, const size_t d) {
memset((void*) distances_u8, 0, U8_PDX_VECTOR_SIZE * sizeof(uint32_t));
#if defined(__ARM_NEON)
uint32x4_t res[U8_N_REGISTERS_NEON];
@@ -355,12 +336,13 @@ inline void u8_pdx_l2(
res[i] = vdupq_n_u32(0);
}
// Compute L2
- for (size_t dim_idx = 0; dim_idx < d; dim_idx+=4) {
+ for (size_t dim_idx = 0; dim_idx < d; dim_idx += 4) {
const uint32_t dimension_idx = dim_idx;
const uint8x8_t vals = vld1_u8(&second_vector[dimension_idx]);
const uint8x16_t vec1_u8 = vqtbl1q_u8(vcombine_u8(vals, vals), idx);
const size_t offset_to_dimension_start = dimension_idx * U8_PDX_VECTOR_SIZE;
- for (int i = 0; i < U8_N_REGISTERS_NEON; ++i) { // total: 64 vectors * 4 dimensions each (at 1 byte per value = 2048-bits)
+ for (int i = 0; i < U8_N_REGISTERS_NEON;
+ ++i) { // total: 64 vectors * 4 dimensions each (at 1 byte per value = 2048-bits)
// Read 16 bytes of data (16 values) with 4 dimensions of 4 vectors
const uint8x16_t vec2_u8 = vld1q_u8(&first_vector[offset_to_dimension_start + i * 16]);
const uint8x16_t diff_u8 = vabdq_u8(vec1_u8, vec2_u8);
@@ -373,23 +355,28 @@ inline void u8_pdx_l2(
}
#elif defined(__AVX512F__)
__m512i res[U8_N_REGISTERS_AVX];
- const uint32_t * query_grouped = (uint32_t *)second_vector;
+ const uint32_t* query_grouped = (uint32_t*) second_vector;
// Load 64 initial values
for (size_t i = 0; i < U8_N_REGISTERS_AVX; ++i) {
res[i] = _mm512_load_si512(&distances_u8[i * 16]);
}
// Compute L2
- for (size_t dim_idx = 0; dim_idx < d; dim_idx+=4) {
+ for (size_t dim_idx = 0; dim_idx < d; dim_idx += 4) {
const uint32_t dimension_idx = dim_idx;
// To load the query efficiently I will load it as uint32_t (4 bytes packed in 1 word)
const uint32_t query_value = query_grouped[dimension_idx / 4];
// And then broadcast it to the register
const __m512i vec1_u8 = _mm512_set1_epi32(query_value);
const size_t offset_to_dimension_start = dimension_idx * U8_PDX_VECTOR_SIZE;
- for (int i = 0; i < U8_N_REGISTERS_AVX; ++i) { // total: 64 vectors (4 iterations of 16 vectors) * 4 dimensions each (at 1 byte per value = 2048-bits)
+ for (int i = 0; i < U8_N_REGISTERS_AVX;
+ ++i) { // total: 64 vectors (4 iterations of 16 vectors) * 4 dimensions each (at 1 byte
+ // per value = 2048-bits)
// Read 64 bytes of data (64 values) with 4 dimensions of 16 vectors
- const __m512i vec2_u8 = _mm512_loadu_si512(&first_vector[offset_to_dimension_start + i * 64]);
- const __m512i diff_u8 = _mm512_or_si512(_mm512_subs_epu8(vec1_u8, vec2_u8), _mm512_subs_epu8(vec2_u8, vec1_u8));
+ const __m512i vec2_u8 =
+ _mm512_loadu_si512(&first_vector[offset_to_dimension_start + i * 64]);
+ const __m512i diff_u8 = _mm512_or_si512(
+ _mm512_subs_epu8(vec1_u8, vec2_u8), _mm512_subs_epu8(vec2_u8, vec1_u8)
+ );
// I can use this asymmetric dot product as my values are actually 7-bit
// Hence, the [sign] properties of the second operand is ignored
// As results will never be negative, it can be stored on res[i] without issues
@@ -403,11 +390,7 @@ inline void u8_pdx_l2(
#endif
};
-inline void u8_pdx_ip(
- const uint8_t *first_vector,
- const uint8_t *second_vector,
- const size_t d
-) {
+inline void u8_pdx_ip(const uint8_t* first_vector, const uint8_t* second_vector, const size_t d) {
memset((void*) distances_u8, 0, U8_PDX_VECTOR_SIZE * sizeof(uint32_t));
#if defined(__ARM_NEON)
uint32x4_t res[U8_N_REGISTERS_NEON];
@@ -417,12 +400,13 @@ inline void u8_pdx_ip(
res[i] = vdupq_n_u32(0);
}
// Compute L2
- for (size_t dim_idx = 0; dim_idx < d; dim_idx+=4) {
+ for (size_t dim_idx = 0; dim_idx < d; dim_idx += 4) {
const uint32_t dimension_idx = dim_idx;
const uint8x8_t vals = vld1_u8(&second_vector[dimension_idx]);
const uint8x16_t vec1_u8 = vqtbl1q_u8(vcombine_u8(vals, vals), idx);
const size_t offset_to_dimension_start = dimension_idx * U8_PDX_VECTOR_SIZE;
- for (int i = 0; i < 16; ++i) { // total: 64 vectors * 4 dimensions each (at 1 byte per value = 2048-bits)
+ for (int i = 0; i < 16;
+ ++i) { // total: 64 vectors * 4 dimensions each (at 1 byte per value = 2048-bits)
// Read 16 bytes of data (16 values) with 4 dimensions of 4 vectors
const uint8x16_t vec2_u8 = vld1q_u8(&first_vector[offset_to_dimension_start + i * 16]);
res[i] = vdotq_u32(res[i], vec2_u8, vec1_u8);
@@ -434,22 +418,25 @@ inline void u8_pdx_ip(
}
#elif defined(__AVX512F__)
__m512i res[U8_N_REGISTERS_AVX];
- const uint32_t * query_grouped = (uint32_t *)second_vector;
+ const uint32_t* query_grouped = (uint32_t*) second_vector;
// Load 64 initial values
for (size_t i = 0; i < U8_N_REGISTERS_AVX; ++i) {
res[i] = _mm512_load_si512(&distances_u8[i * 16]);
}
// Compute L2
- for (size_t dim_idx = 0; dim_idx < d; dim_idx+=4) {
+ for (size_t dim_idx = 0; dim_idx < d; dim_idx += 4) {
const uint32_t dimension_idx = dim_idx;
// To load the query efficiently I will load it as uint32_t (4 bytes packed in 1 word)
const uint32_t query_value = query_grouped[dimension_idx / 4];
// And then broadcast it to the register
const __m512i vec1_u8 = _mm512_set1_epi32(query_value);
const size_t offset_to_dimension_start = dimension_idx * U8_PDX_VECTOR_SIZE;
- for (int i = 0; i < U8_N_REGISTERS_AVX; ++i) { // total: 64 vectors (4 iterations of 16 vectors) * 4 dimensions each (at 1 byte per value = 2048-bits)
+ for (int i = 0; i < U8_N_REGISTERS_AVX;
+ ++i) { // total: 64 vectors (4 iterations of 16 vectors) * 4 dimensions each (at 1 byte
+ // per value = 2048-bits)
// Read 64 bytes of data (64 values) with 4 dimensions of 16 vectors
- const __m512i vec2_u8 = _mm512_loadu_si512(&first_vector[offset_to_dimension_start + i * 64]);
+ const __m512i vec2_u8 =
+ _mm512_loadu_si512(&first_vector[offset_to_dimension_start + i * 64]);
// I can use this asymmetric dot product as my values are actually 7-bit
// Hence, the [sign] properties of the second operand is ignored
// As results will never be negative, it can be stored on res[i] without issues
@@ -463,15 +450,15 @@ inline void u8_pdx_ip(
#endif
};
-template
+template
std::vector> standalone_simd(
- const T *first_vector,
- const T *second_vector,
+ const T* first_vector,
+ const T* second_vector,
const size_t d,
const size_t num_queries,
const size_t num_vectors,
const size_t knn,
- const size_t * positions = nullptr
+ const size_t* positions = nullptr
) {
std::vector> result(knn * num_queries);
std::vector> all_distances(num_vectors);
@@ -484,7 +471,7 @@ std::vector> standalone_simd(
data = data + (positions[j] * d);
}
DistanceType_t current_distance;
- if constexpr (kernel == F32_SIMD_IP){
+ if constexpr (kernel == F32_SIMD_IP) {
current_distance = f32_simd_ip(data, query, d);
} else if constexpr (kernel == F32_SIMD_L2) {
current_distance = f32_simd_l2(data, query, d);
@@ -501,10 +488,7 @@ std::vector> standalone_simd(
}
// Partial sort to get top-k
- if constexpr (
- kernel == F32_SIMD_IP
- || kernel == U8_SIMD_IP
- ) {
+ if constexpr (kernel == F32_SIMD_IP || kernel == U8_SIMD_IP) {
std::partial_sort(
all_distances.begin(),
all_distances.begin() + knn,
@@ -528,10 +512,10 @@ std::vector> standalone_simd(
return result;
}
-template
+template
std::vector> standalone_pdx(
- const T *first_vector,
- const T *second_vector,
+ const T* first_vector,
+ const T* second_vector,
const size_t d,
const size_t num_queries,
const size_t num_vectors,
@@ -544,8 +528,8 @@ std::vector> standalone_pdx(
const T* data = first_vector;
// Fill all_distances by direct indexing
size_t global_offset = 0;
- for (size_t j = 0; j < num_vectors; j+=PDX_BLOCK_SIZE) {
- if constexpr (kernel == F32_PDX_IP){
+ for (size_t j = 0; j < num_vectors; j += PDX_BLOCK_SIZE) {
+ if constexpr (kernel == F32_PDX_IP) {
f32_pdx_ip(data, query, d);
} else if constexpr (kernel == F32_PDX_L2) {
f32_pdx_l2(data, query, d);
@@ -557,7 +541,7 @@ std::vector> standalone_pdx(
// TODO: Ugly (could be a bottleneck on PDX kernels)
for (uint32_t z = 0; z < PDX_BLOCK_SIZE; ++z) {
all_distances[global_offset].index = global_offset;
- if constexpr (std::is_same_v){
+ if constexpr (std::is_same_v) {
all_distances[global_offset].distance = distances_f32[z];
} else if constexpr (std::is_same_v) {
all_distances[global_offset].distance = distances_u8[z];
@@ -568,10 +552,7 @@ std::vector> standalone_pdx(
}
// Partial sort to get top-k
- if constexpr (
- kernel == F32_PDX_IP
- || kernel == U8_PDX_IP
- ) {
+ if constexpr (kernel == F32_PDX_IP || kernel == U8_PDX_IP) {
std::partial_sort(
all_distances.begin(),
all_distances.begin() + knn,
@@ -598,68 +579,92 @@ std::vector> standalone_pdx(
std::vector> standalone_f32(
const VectorSearchKernel kernel,
- const float *first_vector,
- const float *second_vector,
+ const float* first_vector,
+ const float* second_vector,
const size_t d,
const size_t num_queries,
const size_t num_vectors,
const size_t knn
) {
switch (kernel) {
- case F32_PDX_IP:
- return standalone_pdx(first_vector, second_vector, d, num_queries, num_vectors, knn);
- case F32_PDX_L2:
- return standalone_pdx(first_vector, second_vector, d, num_queries, num_vectors, knn);
-
- case F32_SIMD_IP:
- return standalone_simd(first_vector, second_vector, d, num_queries, num_vectors, knn);
- case F32_SIMD_L2:
- return standalone_simd(first_vector, second_vector, d, num_queries, num_vectors, knn);
-
- default:
- return standalone_pdx(first_vector, second_vector, d, num_queries, num_vectors, knn);
+ case F32_PDX_IP:
+ return standalone_pdx(
+ first_vector, second_vector, d, num_queries, num_vectors, knn
+ );
+ case F32_PDX_L2:
+ return standalone_pdx(
+ first_vector, second_vector, d, num_queries, num_vectors, knn
+ );
+
+ case F32_SIMD_IP:
+ return standalone_simd(
+ first_vector, second_vector, d, num_queries, num_vectors, knn
+ );
+ case F32_SIMD_L2:
+ return standalone_simd(
+ first_vector, second_vector, d, num_queries, num_vectors, knn
+ );
+
+ default:
+ return standalone_pdx(
+ first_vector, second_vector, d, num_queries, num_vectors, knn
+ );
}
}
std::vector> filtered_standalone_u8(
const VectorSearchKernel kernel,
- const uint8_t *first_vector,
- const uint8_t *second_vector,
+ const uint8_t* first_vector,
+ const uint8_t* second_vector,
const size_t d,
const size_t num_queries,
const size_t num_vectors,
const size_t knn,
- const size_t *positions
+ const size_t* positions
) {
switch (kernel) {
- case U8_SIMD_L2:
- return standalone_simd(first_vector, second_vector, d, num_queries, num_vectors, knn, positions);
- default:
- return standalone_simd(first_vector, second_vector, d, num_queries, num_vectors, knn, positions);
+ case U8_SIMD_L2:
+ return standalone_simd(
+ first_vector, second_vector, d, num_queries, num_vectors, knn, positions
+ );
+ default:
+ return standalone_simd(
+ first_vector, second_vector, d, num_queries, num_vectors, knn, positions
+ );
}
}
std::vector> standalone_u8(
const VectorSearchKernel kernel,
- const uint8_t *first_vector,
- const uint8_t *second_vector,
+ const uint8_t* first_vector,
+ const uint8_t* second_vector,
const size_t d,
const size_t num_queries,
const size_t num_vectors,
const size_t knn
) {
switch (kernel) {
- case U8_PDX_L2:
- return standalone_pdx(first_vector, second_vector, d, num_queries, num_vectors, knn);
- case U8_PDX_IP:
- return standalone_pdx(first_vector, second_vector, d, num_queries, num_vectors, knn);
-
- case U8_SIMD_L2:
- return standalone_simd(first_vector, second_vector, d, num_queries, num_vectors, knn);
- case U8_SIMD_IP:
- return standalone_simd(first_vector, second_vector, d, num_queries, num_vectors, knn);
-
- default:
- return standalone_pdx(first_vector, second_vector, d, num_queries, num_vectors, knn);
+ case U8_PDX_L2:
+ return standalone_pdx(
+ first_vector, second_vector, d, num_queries, num_vectors, knn
+ );
+ case U8_PDX_IP:
+ return standalone_pdx(
+ first_vector, second_vector, d, num_queries, num_vectors, knn
+ );
+
+ case U8_SIMD_L2:
+ return standalone_simd(
+ first_vector, second_vector, d, num_queries, num_vectors, knn
+ );
+ case U8_SIMD_IP:
+ return standalone_simd(
+ first_vector, second_vector, d, num_queries, num_vectors, knn
+ );
+
+ default:
+ return standalone_pdx(
+ first_vector, second_vector, d, num_queries, num_vectors, knn
+ );
}
}
diff --git a/benchmarks/bench_kernels/kernels.py b/benchmarks/kernels_playground/kernels.py
similarity index 100%
rename from benchmarks/bench_kernels/kernels.py
rename to benchmarks/kernels_playground/kernels.py
diff --git a/benchmarks/bench_kernels/requirements.txt b/benchmarks/kernels_playground/requirements.txt
similarity index 100%
rename from benchmarks/bench_kernels/requirements.txt
rename to benchmarks/kernels_playground/requirements.txt
diff --git a/benchmarks/pdx_end_to_end.cpp b/benchmarks/pdx_end_to_end.cpp
new file mode 100644
index 0000000..927213f
--- /dev/null
+++ b/benchmarks/pdx_end_to_end.cpp
@@ -0,0 +1,210 @@
+#ifndef BENCHMARK_TIME
+#define BENCHMARK_TIME = true
+#endif
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "benchmark_utils.hpp"
+#include "pdx/index.hpp"
+#include "pdx/utils.hpp"
+
+template
+void RunBenchmark(
+ const RawDatasetInfo& info,
+ const std::string& dataset,
+ const std::string& algorithm,
+ const float* data,
+ const float* queries,
+ const std::vector& nprobes_to_use
+) {
+ const size_t d = info.num_dimensions;
+ const size_t n = info.num_embeddings;
+ const size_t n_queries = info.num_queries;
+ uint8_t KNN = BenchmarkUtils::KNN;
+ size_t NUM_MEASURE_RUNS = BenchmarkUtils::NUM_MEASURE_RUNS;
+ std::string RESULTS_PATH = BENCHMARK_UTILS.RESULTS_DIR_PATH + "END_TO_END_PDX_ADSAMPLING.csv";
+
+ PDX::PDXIndexConfig index_config{
+ .num_dimensions = static_cast(d),
+ .distance_metric = info.distance_metric,
+ .seed = 42,
+ .normalize = true,
+ .sampling_fraction = 1.0f
+ };
+
+ std::cout << "Building index (num_clusters=auto)...\n";
+ auto build_start = std::chrono::high_resolution_clock::now();
+ IndexT pdx_index(index_config);
+ pdx_index.BuildIndex(data, n);
+ auto build_end = std::chrono::high_resolution_clock::now();
+ double build_ms = std::chrono::duration(build_end - build_start).count();
+ std::cout << "Build time: " << build_ms << " ms\n";
+ std::cout << "Clusters: " << pdx_index.GetNumClusters() << "\n";
+ std::cout << "Index in-memory size: " << std::fixed << std::setprecision(2)
+ << static_cast(pdx_index.GetInMemorySizeInBytes()) / (1024.0 * 1024.0)
+ << " MB\n";
+
+ // Load ground truth
+ bool use_skmeans_gt = false;
+ std::unordered_map> gt_map;
+ std::unique_ptr gt_buffer;
+ uint32_t* int_ground_truth = nullptr;
+
+ if (use_skmeans_gt) {
+ std::string gt_path = GROUND_TRUTH_JSON_DIR + "/" + dataset + ".json";
+ gt_map = ParseGroundTruthJson(gt_path);
+ if (gt_map.empty()) {
+ std::cerr << "No ground truth found at " << gt_path << "\n";
+ return;
+ }
+ std::cout << "Ground truth loaded (json): " << gt_map.size() << " queries\n";
+ } else {
+ std::string gt_path =
+ BenchmarkUtils::GROUND_TRUTH_DATA + info.pdx_dataset_name + "_100_norm";
+ gt_buffer = MmapFile(gt_path);
+ int_ground_truth = reinterpret_cast(gt_buffer.get());
+ std::cout << "Ground truth loaded (pdx binary): " << gt_path << "\n";
+ }
+
+ for (size_t ivf_nprobe : nprobes_to_use) {
+ if (pdx_index.GetNumClusters() < ivf_nprobe)
+ continue;
+
+ pdx_index.SetNProbe(ivf_nprobe);
+
+ // Recall pass
+ float recalls = 0;
+ if (use_skmeans_gt) {
+ for (size_t l = 0; l < n_queries; ++l) {
+ auto result = pdx_index.Search(queries + l * d, KNN);
+ if (gt_map.count(static_cast(l))) {
+ recalls += ComputeRecallFromJson(result, gt_map.at(static_cast(l)), KNN);
+ }
+ }
+ } else {
+ for (size_t l = 0; l < n_queries; ++l) {
+ auto result = pdx_index.Search(queries + l * d, KNN);
+ BenchmarkUtils::VerifyResult(recalls, result, KNN, int_ground_truth, l);
+ }
+ }
+
+ std::vector runtimes;
+ runtimes.resize(NUM_MEASURE_RUNS * n_queries);
+ TicToc clock;
+ for (size_t j = 0; j < NUM_MEASURE_RUNS; ++j) {
+ for (size_t l = 0; l < n_queries; ++l) {
+ clock.Reset();
+ clock.Tic();
+ pdx_index.Search(queries + l * d, KNN);
+ clock.Toc();
+ runtimes[j + l * NUM_MEASURE_RUNS] = {clock.accum_time};
+ }
+ }
+
+ BenchmarkMetadata results_metadata = {
+ dataset,
+ algorithm,
+ NUM_MEASURE_RUNS,
+ n_queries,
+ ivf_nprobe,
+ KNN,
+ recalls,
+ };
+ BenchmarkUtils::SaveResults(runtimes, RESULTS_PATH, results_metadata);
+ }
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 2) {
+ std::cerr << "Usage: " << argv[0] << " [index_type] [nprobe]\n";
+ std::cerr << "Index types: pdx_f32 (default), pdx_u8, pdx_tree_f32, pdx_tree_u8\n";
+ std::cerr << "Available datasets:";
+ for (const auto& [name, _] : RAW_DATASET_PARAMS) {
+ std::cerr << " " << name;
+ }
+ std::cerr << "\n";
+ return 1;
+ }
+ std::string dataset = argv[1];
+ std::string index_type = (argc > 2) ? argv[2] : "pdx_f32";
+ size_t arg_ivf_nprobe = (argc > 3) ? std::atoi(argv[3]) : 0;
+
+ auto it = RAW_DATASET_PARAMS.find(dataset);
+ if (it == RAW_DATASET_PARAMS.end()) {
+ std::cerr << "Unknown dataset: " << dataset << "\n";
+ return 1;
+ }
+ const auto& info = it->second;
+ const size_t n = info.num_embeddings;
+ const size_t d = info.num_dimensions;
+ const size_t n_queries = info.num_queries;
+
+ std::cout << "==> PDX End-to-End (Build + Search)\n";
+ std::cout << "Dataset: " << dataset << " (n=" << n << ", d=" << d << ")\n";
+ std::cout << "Index type: " << index_type << "\n";
+
+ // Read data
+ std::string data_path = RAW_DATA_DIR + "/data_" + dataset + ".bin";
+ std::string query_path = RAW_DATA_DIR + "/data_" + dataset + "_test.bin";
+
+ std::vector