|
16 | 16 |
|
17 | 17 | #pragma once |
18 | 18 |
|
| 19 | +/// |
| 20 | +/// @file preprocessor.h |
| 21 | +/// @brief Macros for explicit instantiation of distance implementations |
| 22 | +/// |
| 23 | +/// This file contains macros to systematically generate explicit template instantiations |
| 24 | +/// for distance implementations (L2Impl, IPImpl, CosineSimilarityImpl). |
| 25 | +/// |
| 26 | +/// ## Why Explicit Instantiation? |
| 27 | +/// |
| 28 | +/// The library supports runtime ISA dispatch - detecting AVX512/AVX2 support at runtime |
| 29 | +/// and calling the appropriate optimized implementation. This requires: |
| 30 | +/// 1. Separate compilation with architecture-specific compiler flags |
| 31 | +/// 2. Explicit instantiation of templates in those compilation units |
| 32 | +/// |
| 33 | +/// Without explicit instantiation, the templates would be instantiated inline wherever |
| 34 | +/// used, which would prevent proper ISA-specific optimization. |
| 35 | +/// |
| 36 | +/// ## Architecture |
| 37 | +/// |
| 38 | +/// Distance implementations (e.g., `L2Impl`) are thin wrappers that call `generic_simd_op` |
| 39 | +/// with a SIMD operation struct (e.g., `L2FloatOp<16>`). The SIMD ops contain the actual |
| 40 | +/// AVX intrinsics. By explicitly instantiating the distance implementations in files |
| 41 | +/// compiled with `-march=cascadelake` or `-march=haswell`, we ensure the AVX code is |
| 42 | +/// generated with appropriate optimizations. |
| 43 | +/// |
| 44 | +/// ## Type Combinations |
| 45 | +/// |
| 46 | +/// Each macro instantiates 16 type combinations (4 element types × 4 element types): |
| 47 | +/// - float, int8_t, uint8_t, Float16 |
| 48 | +/// |
| 49 | +/// This covers all supported mixed-type distance computations. |
| 50 | +/// |
| 51 | + |
| 52 | +/// Helper macro for L2 distance explicit instantiation |
| 53 | +/// @param SPEC Either `template` (for definitions) or `extern template` (for declarations) |
| 54 | +/// @param N Dimensionality (e.g., 64, 128, Dynamic) |
| 55 | +/// @param AVX AVX availability level (AVX_AVAILABILITY::AVX512 or AVX_AVAILABILITY::AVX2) |
19 | 56 | #define DISTANCE_L2_TEMPLATE_HELPER(SPEC, N, AVX) \ |
20 | 57 | SPEC struct L2Impl<N, float, float, AVX>; \ |
21 | 58 | SPEC struct L2Impl<N, float, int8_t, AVX>; \ |
|
34 | 71 | SPEC struct L2Impl<N, svs::float16::Float16, uint8_t, AVX>; \ |
35 | 72 | SPEC struct L2Impl<N, svs::float16::Float16, svs::float16::Float16, AVX>; |
36 | 73 |
|
| 74 | +/// Instantiate L2 distance implementations (use in .cpp files) |
37 | 75 | #define DISTANCE_L2_INSTANTIATE_TEMPLATE(N, AVX) \ |
38 | 76 | DISTANCE_L2_TEMPLATE_HELPER(template, N, AVX); |
39 | 77 |
|
| 78 | +/// Declare external L2 distance implementations (use in .h files) |
40 | 79 | #define DISTANCE_L2_EXTERN_TEMPLATE(N, AVX) \ |
41 | 80 | DISTANCE_L2_TEMPLATE_HELPER(extern template, N, AVX); |
42 | 81 |
|
| 82 | +/// Helper macro for Inner Product explicit instantiation |
| 83 | +/// @param SPEC Either `template` (for definitions) or `extern template` (for declarations) |
| 84 | +/// @param N Dimensionality (e.g., 64, 128, Dynamic) |
| 85 | +/// @param AVX AVX availability level (AVX_AVAILABILITY::AVX512 or AVX_AVAILABILITY::AVX2) |
43 | 86 | #define DISTANCE_IP_TEMPLATE_HELPER(SPEC, N, AVX) \ |
44 | 87 | SPEC struct IPImpl<N, float, float, AVX>; \ |
45 | 88 | SPEC struct IPImpl<N, float, int8_t, AVX>; \ |
|
58 | 101 | SPEC struct IPImpl<N, svs::float16::Float16, uint8_t, AVX>; \ |
59 | 102 | SPEC struct IPImpl<N, svs::float16::Float16, svs::float16::Float16, AVX>; |
60 | 103 |
|
| 104 | +/// Instantiate Inner Product implementations (use in .cpp files) |
61 | 105 | #define DISTANCE_IP_INSTANTIATE_TEMPLATE(N, AVX) \ |
62 | 106 | DISTANCE_IP_TEMPLATE_HELPER(template, N, AVX); |
63 | 107 |
|
| 108 | +/// Declare external Inner Product implementations (use in .h files) |
64 | 109 | #define DISTANCE_IP_EXTERN_TEMPLATE(N, AVX) \ |
65 | 110 | DISTANCE_IP_TEMPLATE_HELPER(extern template, N, AVX); |
66 | 111 |
|
| 112 | +/// Helper macro for Cosine Similarity explicit instantiation |
| 113 | +/// @param SPEC Either `template` (for definitions) or `extern template` (for declarations) |
| 114 | +/// @param N Dimensionality (e.g., 64, 128, Dynamic) |
| 115 | +/// @param AVX AVX availability level (AVX_AVAILABILITY::AVX512 or AVX_AVAILABILITY::AVX2) |
67 | 116 | #define DISTANCE_CS_TEMPLATE_HELPER(SPEC, N, AVX) \ |
68 | 117 | SPEC struct CosineSimilarityImpl<N, float, float, AVX>; \ |
69 | 118 | SPEC struct CosineSimilarityImpl<N, float, int8_t, AVX>; \ |
|
82 | 131 | SPEC struct CosineSimilarityImpl<N, svs::float16::Float16, uint8_t, AVX>; \ |
83 | 132 | SPEC struct CosineSimilarityImpl<N, svs::float16::Float16, svs::float16::Float16, AVX>; |
84 | 133 |
|
| 134 | +/// Instantiate Cosine Similarity implementations (use in .cpp files) |
85 | 135 | #define DISTANCE_CS_INSTANTIATE_TEMPLATE(N, AVX) \ |
86 | 136 | DISTANCE_CS_TEMPLATE_HELPER(template, N, AVX); |
87 | 137 |
|
| 138 | +/// Declare external Cosine Similarity implementations (use in .h files) |
88 | 139 | #define DISTANCE_CS_EXTERN_TEMPLATE(N, AVX) \ |
89 | 140 | DISTANCE_CS_TEMPLATE_HELPER(extern template, N, AVX); |
0 commit comments