Skip to content

Commit 1878951

Browse files
authored
feature: Per index logger and new logger unit test for each index (#87)
This PR intends to add per index logging feature
1 parent a8221ad commit 1878951

File tree

14 files changed

+495
-48
lines changed

14 files changed

+495
-48
lines changed

include/svs/index/flat/flat.h

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#pragma once
1818

1919
// Flat index utilities
20+
#include "svs/core/logging.h"
2021
#include "svs/index/flat/inserters.h"
2122
#include "svs/index/index.h"
2223

@@ -145,6 +146,8 @@ class FlatIndex {
145146
data_storage_type data_;
146147
[[no_unique_address]] distance_type distance_;
147148
threads::ThreadPoolHandle threadpool_;
149+
// SVS logger for per index logging
150+
svs::logging::logger_ptr logger_;
148151

149152
// Constructs controlling the iteration strategy over the data and queries.
150153
search_parameters_type search_parameters_{};
@@ -171,6 +174,9 @@ class FlatIndex {
171174
}
172175

173176
public:
177+
/// @brief Getter method for logger
178+
svs::logging::logger_ptr get_logger() const { return logger_; }
179+
174180
search_parameters_type get_search_parameters() const { return search_parameters_; }
175181

176182
void set_search_parameters(const search_parameters_type& search_parameters) {
@@ -189,22 +195,35 @@ class FlatIndex {
189195
/// instance or an integer specifying the number of threads to use. In the latter
190196
/// case, a new default thread pool will be constructed using ``threadpool_proto``
191197
/// as the number of threads to create.
198+
/// @param logger_ Spd logger for per-index logging customization.
192199
///
193200
/// @copydoc threadpool_requirements
194201
///
195202
template <typename ThreadPoolProto>
196-
FlatIndex(Data data, Dist distance, ThreadPoolProto threadpool_proto)
203+
FlatIndex(
204+
Data data,
205+
Dist distance,
206+
ThreadPoolProto threadpool_proto,
207+
svs::logging::logger_ptr logger = svs::logging::get()
208+
)
197209
requires std::is_same_v<Ownership, OwnsMembers>
198210
: data_{std::move(data)}
199211
, distance_{std::move(distance)}
200-
, threadpool_{threads::as_threadpool(std::move(threadpool_proto))} {}
212+
, threadpool_{threads::as_threadpool(std::move(threadpool_proto))}
213+
, logger_{std::move(logger)} {}
201214

202215
template <typename ThreadPoolProto>
203-
FlatIndex(Data& data, Dist distance, ThreadPoolProto threadpool_proto)
216+
FlatIndex(
217+
Data& data,
218+
Dist distance,
219+
ThreadPoolProto threadpool_proto,
220+
svs::logging::logger_ptr logger = svs::logging::get()
221+
)
204222
requires std::is_same_v<Ownership, ReferencesMembers>
205223
: data_{data}
206224
, distance_{std::move(distance)}
207-
, threadpool_{threads::as_threadpool(std::move(threadpool_proto))} {}
225+
, threadpool_{threads::as_threadpool(std::move(threadpool_proto))}
226+
, logger_{std::move(logger)} {}
208227

209228
////// Dataset Interface
210229

@@ -462,6 +481,7 @@ class FlatIndex {
462481
/// instance or an integer specifying the number of threads to use. In the latter case,
463482
/// a new default thread pool will be constructed using ``threadpool_proto`` as the
464483
/// number of threads to create.
484+
/// @param logger_ Spd logger for per-index logging customization.
465485
///
466486
/// This method provides much of the heavy lifting for constructing a Flat index from
467487
/// a data file on disk or a dataset in memory.
@@ -472,11 +492,16 @@ class FlatIndex {
472492
///
473493
template <typename DataProto, typename Distance, typename ThreadPoolProto>
474494
auto auto_assemble(
475-
DataProto&& data_proto, Distance distance, ThreadPoolProto threadpool_proto
495+
DataProto&& data_proto,
496+
Distance distance,
497+
ThreadPoolProto threadpool_proto,
498+
svs::logging::logger_ptr logger = svs::logging::get()
476499
) {
477500
auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
478501
auto data = svs::detail::dispatch_load(std::forward<DataProto>(data_proto), threadpool);
479-
return FlatIndex(std::move(data), std::move(distance), std::move(threadpool));
502+
return FlatIndex(
503+
std::move(data), std::move(distance), std::move(threadpool), std::move(logger)
504+
);
480505
}
481506

482507
/// @brief Alias for a short-lived flat index.

include/svs/index/inverted/clustering.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -801,7 +801,8 @@ Clustering<I> cluster_with(
801801
const Data& data,
802802
std::span<const I> centroid_ids,
803803
const ClusteringParameters& params,
804-
Index& primary_index
804+
Index& primary_index,
805+
svs::logging::logger_ptr logger = svs::logging::get()
805806
) {
806807
for (auto id : centroid_ids) {
807808
if (id >= data.size()) {
@@ -820,7 +821,6 @@ Clustering<I> cluster_with(
820821
size_t start = 0;
821822
size_t datasize = data.size();
822823
auto timer = lib::Timer();
823-
auto logger = svs::logging::get();
824824

825825
while (start < datasize) {
826826
size_t stop = std::min(start + batchsize, datasize);

include/svs/index/inverted/memory_based.h

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -339,12 +339,17 @@ template <typename Index, typename Cluster> class InvertedIndex {
339339

340340
template <threads::ThreadPool Pool>
341341
InvertedIndex(
342-
Index index, Cluster cluster, translator_type index_local_to_global, Pool threadpool
342+
Index index,
343+
Cluster cluster,
344+
translator_type index_local_to_global,
345+
Pool threadpool,
346+
svs::logging::logger_ptr logger = svs::logging::get()
343347
)
344348
: index_{std::move(index)}
345349
, cluster_{std::move(cluster)}
346350
, index_local_to_global_{std::move(index_local_to_global)}
347-
, threadpool_{std::move(threadpool)} {
351+
, threadpool_{std::move(threadpool)}
352+
, logger_{std::move(logger)} {
348353
// Clear out the threadpool in the inner index - prefer to handle threading
349354
// ourselves.
350355
index_.set_threadpool(threads::SequentialThreadPool());
@@ -492,6 +497,10 @@ template <typename Index, typename Cluster> class InvertedIndex {
492497
index_.save(index_config, graph, data);
493498
}
494499

500+
///// Accessors
501+
/// @brief Getter method for logger
502+
svs::logging::logger_ptr get_logger() const { return logger_; }
503+
495504
private:
496505
// Tunable Parameters
497506
double refinement_epsilon_ = 10.0;
@@ -503,6 +512,9 @@ template <typename Index, typename Cluster> class InvertedIndex {
503512

504513
// Transient parameters.
505514
threads::ThreadPoolHandle threadpool_;
515+
516+
// SVS logger for per index logging
517+
svs::logging::logger_ptr logger_;
506518
};
507519

508520
struct PickRandomly {
@@ -548,7 +560,8 @@ auto auto_build(
548560
// Customizations
549561
Strategy strategy = {},
550562
CentroidPicker centroid_picker = {},
551-
ClusteringOp clustering_op = {}
563+
ClusteringOp clustering_op = {},
564+
svs::logging::logger_ptr logger = svs::logging::get()
552565
) {
553566
// Perform clustering.
554567
auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
@@ -569,7 +582,11 @@ auto auto_build(
569582

570583
// Cluster the dataset with the help of the primary index.
571584
auto clustering = cluster_with(
572-
data, lib::as_const_span(centroids), parameters.clustering_parameters_, index
585+
data,
586+
lib::as_const_span(centroids),
587+
parameters.clustering_parameters_,
588+
index,
589+
logger
573590
);
574591

575592
// Perform any post-proceseccing on the clustering.
@@ -585,7 +602,8 @@ auto auto_build(
585602
std::move(index),
586603
strategy(data, clustering, HugepageAllocator<std::byte>()),
587604
std::move(centroids),
588-
std::move(primary_threadpool)};
605+
std::move(primary_threadpool),
606+
std::move(logger)};
589607
}
590608

591609
///// Auto Assembling.
@@ -601,7 +619,8 @@ auto assemble_from_clustering(
601619
Strategy strategy,
602620
const std::filesystem::path& index_config,
603621
const std::filesystem::path& graph,
604-
ThreadPoolProto threadpool_proto
622+
ThreadPoolProto threadpool_proto,
623+
svs::logging::logger_ptr logger = svs::logging::get()
605624
) {
606625
auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
607626
auto original = svs::detail::dispatch_load(std::move(data_proto), threadpool);
@@ -621,15 +640,17 @@ auto assemble_from_clustering(
621640
return local_data;
622641
}),
623642
distance,
624-
1
643+
1,
644+
logger
625645
);
626646

627647
// Create the clustering and return the final results.
628648
return InvertedIndex(
629649
std::move(index),
630650
strategy(original, clustering, HugepageAllocator<std::byte>()),
631651
std::move(ids),
632-
std::move(threadpool)
652+
std::move(threadpool),
653+
std::move(logger)
633654
);
634655
}
635656

include/svs/index/vamana/calibrate.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -176,9 +176,9 @@ VamanaSearchParameters optimize_split_buffer(
176176
double target_recall,
177177
VamanaSearchParameters current,
178178
const F& compute_recall,
179-
const DoSearch& do_search
179+
const DoSearch& do_search,
180+
svs::logging::logger_ptr logger = svs::logging::get()
180181
) {
181-
auto logger = svs::logging::get();
182182
svs::logging::trace(logger, "Entering split buffer optimization routine");
183183
assert(
184184
current.buffer_config_.get_search_window_size() ==
@@ -252,11 +252,11 @@ std::pair<VamanaSearchParameters, bool> optimize_search_buffer(
252252
size_t num_neighbors,
253253
double target_recall,
254254
const ComputeRecall& compute_recall,
255-
const DoSearch& do_search
255+
const DoSearch& do_search,
256+
svs::logging::logger_ptr logger = svs::logging::get()
256257
) {
257258
using enum CalibrationParameters::SearchBufferOptimization;
258259
using dataset_type = typename Index::data_type;
259-
auto logger = svs::logging::get();
260260

261261
double max_recall = std::numeric_limits<double>::lowest();
262262
const size_t current_capacity = current.buffer_config_.get_total_capacity();
@@ -345,9 +345,9 @@ VamanaSearchParameters tune_prefetch(
345345
const CalibrationParameters& calibration_parameters,
346346
Index& index,
347347
VamanaSearchParameters search_parameters,
348-
const DoSearch& do_search
348+
const DoSearch& do_search,
349+
svs::logging::logger_ptr logger = svs::logging::get()
349350
) {
350-
auto logger = svs::logging::get();
351351
svs::logging::trace(logger, "Tuning prefetch parameters");
352352
const auto& prefetch_steps = calibration_parameters.prefetch_steps_;
353353
size_t max_lookahead = index.max_degree();

include/svs/index/vamana/dynamic_index.h

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,9 @@ class MutableVamanaIndex {
157157
float alpha_ = 1.2;
158158
bool use_full_search_history_ = true;
159159

160+
// SVS logger for per index logging
161+
svs::logging::logger_ptr logger_;
162+
160163
// Methods
161164
public:
162165
// Constructors
@@ -167,7 +170,9 @@ class MutableVamanaIndex {
167170
Idx entry_point,
168171
Dist distance_function,
169172
const ExternalIds& external_ids,
170-
ThreadPoolProto threadpool_proto
173+
ThreadPoolProto threadpool_proto,
174+
// Optional logger parameter
175+
svs::logging::logger_ptr logger = svs::logging::get()
171176
)
172177
: graph_{std::move(graph)}
173178
, data_{std::move(data)}
@@ -178,7 +183,9 @@ class MutableVamanaIndex {
178183
, distance_{std::move(distance_function)}
179184
, threadpool_{threads::as_threadpool(std::move(threadpool_proto))}
180185
, search_parameters_{vamana::construct_default_search_parameters(data_)}
181-
, construction_window_size_{2 * graph.max_degree()} {
186+
, construction_window_size_{2 * graph.max_degree()}
187+
// Ctor accept logger in parameter
188+
, logger_{std::move(logger)} {
182189
translator_.insert(external_ids, threads::UnitRange<Idx>(0, external_ids.size()));
183190
}
184191

@@ -191,7 +198,8 @@ class MutableVamanaIndex {
191198
Data data,
192199
const ExternalIds& external_ids,
193200
Dist distance_function,
194-
ThreadPoolProto threadpool_proto
201+
ThreadPoolProto threadpool_proto,
202+
svs::logging::logger_ptr logger = svs::logging::get()
195203
)
196204
: graph_(Graph{data.size(), parameters.graph_max_degree})
197205
, data_(std::move(data))
@@ -206,7 +214,8 @@ class MutableVamanaIndex {
206214
, max_candidates_(parameters.max_candidate_pool_size)
207215
, prune_to_(parameters.prune_to)
208216
, alpha_(parameters.alpha)
209-
, use_full_search_history_{parameters.use_full_search_history} {
217+
, use_full_search_history_{parameters.use_full_search_history}
218+
, logger_{std::move(logger)} {
210219
// Setup the initial translation of external to internal ids.
211220
translator_.insert(external_ids, threads::UnitRange<Idx>(0, external_ids.size()));
212221

@@ -220,8 +229,8 @@ class MutableVamanaIndex {
220229
auto builder = VamanaBuilder(
221230
graph_, data_, distance_, parameters, threadpool_, prefetch_parameters
222231
);
223-
builder.construct(1.0f, entry_point_[0]);
224-
builder.construct(parameters.alpha, entry_point_[0]);
232+
builder.construct(1.0f, entry_point_[0], logging::Level::Info, logger_);
233+
builder.construct(parameters.alpha, entry_point_[0], logging::Level::Info, logger_);
225234
}
226235

227236
/// @brief Post re-load constructor.
@@ -240,7 +249,8 @@ class MutableVamanaIndex {
240249
graph_type graph,
241250
const Dist& distance_function,
242251
IDTranslator translator,
243-
Pool threadpool
252+
Pool threadpool,
253+
svs::logging::logger_ptr logger = svs::logging::get()
244254
)
245255
: graph_{std::move(graph)}
246256
, data_{std::move(data)}
@@ -255,7 +265,8 @@ class MutableVamanaIndex {
255265
, max_candidates_{config.build_parameters.max_candidate_pool_size}
256266
, prune_to_{config.build_parameters.prune_to}
257267
, alpha_{config.build_parameters.alpha}
258-
, use_full_search_history_{config.build_parameters.use_full_search_history} {}
268+
, use_full_search_history_{config.build_parameters.use_full_search_history}
269+
, logger_{std::move(logger)} {}
259270

260271
///// Scratchspace
261272
scratchspace_type scratchspace(const search_parameters_type& sp) const {
@@ -272,6 +283,8 @@ class MutableVamanaIndex {
272283
scratchspace_type scratchspace() const { return scratchspace(get_search_parameters()); }
273284

274285
///// Accessors
286+
/// @brief Getter method for logger
287+
svs::logging::logger_ptr get_logger() const { return logger_; }
275288

276289
/// @brief Get the alpha value used for pruning while mutating the graph.
277290
float get_alpha() const { return alpha_; }
@@ -1200,6 +1213,17 @@ template <typename Data, typename Dist, typename ExternalIds, threads::ThreadPoo
12001213
MutableVamanaIndex(const VamanaBuildParameters&, Data, const ExternalIds&, Dist, Pool)
12011214
-> MutableVamanaIndex<graphs::SimpleBlockedGraph<uint32_t>, Data, Dist>;
12021215

1216+
// Guide with logging
1217+
template <typename Data, typename Dist, typename ExternalIds, threads::ThreadPool Pool>
1218+
MutableVamanaIndex(
1219+
const VamanaBuildParameters&,
1220+
Data,
1221+
const ExternalIds&,
1222+
Dist,
1223+
Pool,
1224+
svs::logging::logger_ptr
1225+
) -> MutableVamanaIndex<graphs::SimpleBlockedGraph<uint32_t>, Data, Dist>;
1226+
12031227
namespace detail {
12041228

12051229
struct VamanaStateLoader {
@@ -1251,7 +1275,8 @@ auto auto_dynamic_assemble(
12511275
// to easily benchmark the static versus dynamic implementation.
12521276
//
12531277
// This is an internal API and should not be considered officially supported nor stable.
1254-
bool debug_load_from_static = false
1278+
bool debug_load_from_static = false,
1279+
svs::logging::logger_ptr logger = svs::logging::get()
12551280
) {
12561281
// Load the dataset
12571282
auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
@@ -1317,7 +1342,8 @@ auto auto_dynamic_assemble(
13171342
std::move(graph),
13181343
std::move(distance),
13191344
std::move(translator),
1320-
std::move(threadpool)};
1345+
std::move(threadpool),
1346+
std::move(logger)};
13211347
}
13221348

13231349
} // namespace svs::index::vamana

0 commit comments

Comments
 (0)