From 164ee9b361db131711d76b6a220df284ae52d5b7 Mon Sep 17 00:00:00 2001 From: Varik Matevosyan Date: Mon, 7 Oct 2024 12:12:37 +0400 Subject: [PATCH 1/3] replace linkedlist for takenbuffer with fixed array --- src/hnsw/external_index.c | 33 +++++++++++---------- src/hnsw/external_index.h | 20 ++++--------- src/hnsw/insert.c | 2 +- src/hnsw/retriever.c | 60 ++++++++++++++------------------------- src/hnsw/retriever.h | 2 +- src/hnsw/scan.c | 5 ++-- 6 files changed, 49 insertions(+), 73 deletions(-) diff --git a/src/hnsw/external_index.c b/src/hnsw/external_index.c index 904791a3e..31179d616 100644 --- a/src/hnsw/external_index.c +++ b/src/hnsw/external_index.c @@ -626,7 +626,9 @@ void *ldb_wal_index_node_retriever(void *ctxp, unsigned long long id) page = extra_dirtied_get(ctx->extra_dirted, data_block_no, NULL); if(page == NULL) { buf = ReadBufferExtended(ctx->index_rel, MAIN_FORKNUM, data_block_no, RBM_NORMAL, NULL); +#if LANTERNDB_COPYNODES LockBuffer(buf, BUFFER_LOCK_SHARE); +#endif page = BufferGetPage(buf); } else { idx_page_prelocked = true; @@ -634,29 +636,31 @@ void *ldb_wal_index_node_retriever(void *ctxp, unsigned long long id) nodepage = (HnswIndexTuple *)PageGetItem(page, PageGetItemId(page, tid_data.ip_posid)); #if LANTERNDB_COPYNODES - BufferNode *buffNode; - buffNode = (BufferNode *)palloc(sizeof(BufferNode)); - buffNode->buf = (char *)palloc(nodepage->size); - memcpy(buffNode->buf, nodepage->node, nodepage->size); + char *buf_p = (char *)palloc(nodepage->size); + memcpy(buf_p, nodepage->node, nodepage->size); if(!idx_page_prelocked) { UnlockReleaseBuffer(buf); } - dlist_push_tail(&ctx->takenbuffers, &buffNode->node); - return buffNode->buf; + if(ctx->takenbuffers[ ctx->takenbuffers_next ]) { + pfree(ctx->takenbuffers[ ctx->takenbuffers_next ]); + } + + ctx->takenbuffers[ ctx->takenbuffers_next ] = buf_p; + ctx->takenbuffers_next = (ctx->takenbuffers_next + 1) % ctx->takenbuffers_size; + return buf_p; #endif // if we locked the page, unlock it and only leave a pin on it. // otherwise, it must must have been locked because we are in the middle of an update and that node // was affected, so we must leave it locked if(!idx_page_prelocked) { - // Wrap buf in a linked list node - BufferNode *buffNode; - buffNode = (BufferNode *)palloc(sizeof(BufferNode)); - buffNode->buf = buf; - - // Add buffNode to list of pinned buffers - dlist_push_tail(&ctx->takenbuffers, &buffNode->node); - LockBuffer(buf, BUFFER_LOCK_UNLOCK); + // Add buffer to the list of pinned buffers + if(ctx->takenbuffers[ ctx->takenbuffers_next ]) { + ReleaseBuffer(ctx->takenbuffers[ ctx->takenbuffers_next ]); + } + + ctx->takenbuffers[ ctx->takenbuffers_next ] = buf; + ctx->takenbuffers_next = (ctx->takenbuffers_next + 1) % ctx->takenbuffers_size; } #if PG_VERSION_NUM >= 130000 @@ -666,7 +670,6 @@ void *ldb_wal_index_node_retriever(void *ctxp, unsigned long long id) 0, "pinned more tuples during node retrieval than will fit in work_mem, cosider increasing work_mem"); #endif - // fa_cache_insert(&ctx->fa_cache, (uint32)id, nodepage->node); return nodepage->node; } diff --git a/src/hnsw/external_index.h b/src/hnsw/external_index.h index fbfa1a14f..a7ec982c8 100644 --- a/src/hnsw/external_index.h +++ b/src/hnsw/external_index.h @@ -11,9 +11,7 @@ #include "external_index_socket.h" #include "extra_dirtied.h" -#include "fa_cache.h" #include "hnsw.h" -#include "htab_cache.h" #include "options.h" #include "usearch.h" @@ -83,8 +81,6 @@ typedef struct HnswIndexTuple typedef struct { - HTABCache block_numbers_cache; - Relation index_rel; // used for inserts @@ -92,20 +88,14 @@ typedef struct ExtraDirtiedBufs *extra_dirted; - FullyAssociativeCache fa_cache; - - dlist_head takenbuffers; -} RetrieverCtx; - -typedef struct -{ #if LANTERNDB_COPYNODES - char *buf; + char *takenbuffers; #else - Buffer buf; + Buffer *takenbuffers; #endif - dlist_node node; -} BufferNode; + uint32 takenbuffers_size; + uint32 takenbuffers_next; +} RetrieverCtx; typedef struct { diff --git a/src/hnsw/insert.c b/src/hnsw/insert.c index 70f373c42..0df64f1d5 100644 --- a/src/hnsw/insert.c +++ b/src/hnsw/insert.c @@ -127,7 +127,7 @@ bool ldb_aminsert(Relation index, opts.quantization = usearch_scalar_b1_k; usearch_scalar = usearch_scalar_b1_k; } - opts.retriever_ctx = ldb_wal_retriever_area_init(index, hdr); + opts.retriever_ctx = ldb_wal_retriever_area_init(index, hdr, hdr->m); opts.retriever = ldb_wal_index_node_retriever; opts.retriever_mut = ldb_wal_index_node_retriever_mut; diff --git a/src/hnsw/retriever.c b/src/hnsw/retriever.c index a2db6a8ef..b598059ba 100644 --- a/src/hnsw/retriever.c +++ b/src/hnsw/retriever.c @@ -5,66 +5,48 @@ #include #include -#include // BLCKSZ +#include // Buffer #include #include #include "external_index.h" -#include "htab_cache.h" -#include "insert.h" -RetrieverCtx *ldb_wal_retriever_area_init(Relation index_rel, HnswIndexHeaderPage *header_page_under_wal) +RetrieverCtx *ldb_wal_retriever_area_init(Relation index_rel, HnswIndexHeaderPage *header_page_under_wal, uint32 m) { RetrieverCtx *ctx = palloc0(sizeof(RetrieverCtx)); ctx->index_rel = index_rel; ctx->header_page_under_wal = header_page_under_wal; ctx->extra_dirted = extra_dirtied_new(); - - fa_cache_init(&ctx->fa_cache); - - dlist_init(&ctx->takenbuffers); - - /* fill in a buffer with blockno index information, before spilling it to disk */ - ctx->block_numbers_cache = cache_create("BlockNumberCache"); + ctx->takenbuffers_size = m * 2; + ctx->takenbuffers_next = 0; +#if LANTERNDB_COPYNODES + ctx->takenbuffers = palloc0(sizeof(char *) * ctx->takenbuffers_size); +#else + ctx->takenbuffers = palloc0(sizeof(Buffer) * ctx->takenbuffers_size); +#endif return ctx; } void ldb_wal_retriever_area_reset(RetrieverCtx *ctx) { - dlist_mutable_iter miter; - dlist_foreach_modify(miter, &ctx->takenbuffers) - { - BufferNode *node = dlist_container(BufferNode, node, miter.cur); - if(node->buf != InvalidBuffer) { - ReleaseBuffer(node->buf); + for(uint32 i = 0; i < ctx->takenbuffers_size; i++) { + if(ctx->takenbuffers[ i ]) { +#if LANTERNDB_COPYNODES + pfree(ctx->takenbuffers[ i ]); + ctx->takenbuffers[ i ] = NULL; +#else + ReleaseBuffer(ctx->takenbuffers[ i ]); + ctx->takenbuffers[ i ] = 0; +#endif } - dlist_delete(miter.cur); - pfree(node); } - dlist_init(&ctx->takenbuffers); - - fa_cache_init(&ctx->fa_cache); + ctx->takenbuffers_next = 0; } void ldb_wal_retriever_area_fini(RetrieverCtx *ctx) { - cache_destroy(&ctx->block_numbers_cache); - dlist_mutable_iter miter; - dlist_foreach_modify(miter, &ctx->takenbuffers) - { - BufferNode *node = dlist_container(BufferNode, node, miter.cur); -#if LANTERNDB_COPYNODES - pfree(node->buf); -#else - if(node->buf != InvalidBuffer) { - ReleaseBuffer(node->buf); - } -#endif - dlist_delete(miter.cur); - pfree(node); - } - dlist_init(&ctx->takenbuffers); - + ldb_wal_retriever_area_reset(ctx); + pfree(ctx->takenbuffers); extra_dirtied_free(ctx->extra_dirted); } diff --git a/src/hnsw/retriever.h b/src/hnsw/retriever.h index d33aad609..babb44b9e 100644 --- a/src/hnsw/retriever.h +++ b/src/hnsw/retriever.h @@ -9,7 +9,7 @@ // this area is used to return pointers back to usearch -RetrieverCtx* ldb_wal_retriever_area_init(Relation index_rel, HnswIndexHeaderPage* header_page_under_wal); +RetrieverCtx* ldb_wal_retriever_area_init(Relation index_rel, HnswIndexHeaderPage* header_page_under_wal, uint32 m); // can be used after each usearch_search to tell the retriever that the pointers given out // will no longer be used void ldb_wal_retriever_area_reset(RetrieverCtx* ctx); diff --git a/src/hnsw/scan.c b/src/hnsw/scan.c index b36b98180..7a58e3bd0 100644 --- a/src/hnsw/scan.c +++ b/src/hnsw/scan.c @@ -28,11 +28,10 @@ IndexScanDesc ldb_ambeginscan(Relation index, int nkeys, int norderbys) int dimensions; usearch_error_t error = NULL; usearch_init_options_t opts; + RetrieverCtx *retriever_ctx; (void)CheckExtensionVersions(); - RetrieverCtx *retriever_ctx = ldb_wal_retriever_area_init(index, NULL); - scan = RelationGetIndexScan(index, nkeys, norderbys); // ** initialize usearch data structures and set up external retriever @@ -52,6 +51,7 @@ IndexScanDesc ldb_ambeginscan(Relation index, int nkeys, int norderbys) headerp = (HnswIndexHeaderPage *)PageGetContents(page); assert(headerp->magicNumber == LDB_WAL_MAGIC_NUMBER); + retriever_ctx = ldb_wal_retriever_area_init(index, NULL, headerp->m); // Initialize usearch index options based on params stored in our index header dimensions = headerp->vector_dim; @@ -279,6 +279,7 @@ bool ldb_amgettuple(IndexScanDesc scan, ScanDirection dir) scanstate->labels, scanstate->distances, &error); + ldb_wal_retriever_area_reset(scanstate->retriever_ctx); scanstate->count = num_returned; From 9206d5a637644a0c23abe22d13040a4d54bd0be4 Mon Sep 17 00:00:00 2001 From: Varik Matevosyan Date: Mon, 7 Oct 2024 14:25:17 +0400 Subject: [PATCH 2/3] fix takenbuffers type for LANTERNDB_COPYNODES path --- src/hnsw/external_index.c | 4 ++-- src/hnsw/external_index.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/hnsw/external_index.c b/src/hnsw/external_index.c index 31179d616..9e958ce9a 100644 --- a/src/hnsw/external_index.c +++ b/src/hnsw/external_index.c @@ -648,8 +648,7 @@ void *ldb_wal_index_node_retriever(void *ctxp, unsigned long long id) ctx->takenbuffers[ ctx->takenbuffers_next ] = buf_p; ctx->takenbuffers_next = (ctx->takenbuffers_next + 1) % ctx->takenbuffers_size; return buf_p; -#endif - +#else // if we locked the page, unlock it and only leave a pin on it. // otherwise, it must must have been locked because we are in the middle of an update and that node // was affected, so we must leave it locked @@ -662,6 +661,7 @@ void *ldb_wal_index_node_retriever(void *ctxp, unsigned long long id) ctx->takenbuffers[ ctx->takenbuffers_next ] = buf; ctx->takenbuffers_next = (ctx->takenbuffers_next + 1) % ctx->takenbuffers_size; } +#endif #if PG_VERSION_NUM >= 130000 CheckMem(work_mem, diff --git a/src/hnsw/external_index.h b/src/hnsw/external_index.h index a7ec982c8..9d7823c57 100644 --- a/src/hnsw/external_index.h +++ b/src/hnsw/external_index.h @@ -89,7 +89,7 @@ typedef struct ExtraDirtiedBufs *extra_dirted; #if LANTERNDB_COPYNODES - char *takenbuffers; + char **takenbuffers; #else Buffer *takenbuffers; #endif From 609070a9a14584d26f1237ba18b1ca2bad0e5878 Mon Sep 17 00:00:00 2001 From: Varik Matevosyan Date: Mon, 7 Oct 2024 14:49:30 +0400 Subject: [PATCH 3/3] increase takenbuffers size --- src/hnsw/retriever.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hnsw/retriever.c b/src/hnsw/retriever.c index b598059ba..c3deef987 100644 --- a/src/hnsw/retriever.c +++ b/src/hnsw/retriever.c @@ -17,7 +17,7 @@ RetrieverCtx *ldb_wal_retriever_area_init(Relation index_rel, HnswIndexHeaderPag ctx->index_rel = index_rel; ctx->header_page_under_wal = header_page_under_wal; ctx->extra_dirted = extra_dirtied_new(); - ctx->takenbuffers_size = m * 2; + ctx->takenbuffers_size = m * 5; ctx->takenbuffers_next = 0; #if LANTERNDB_COPYNODES ctx->takenbuffers = palloc0(sizeof(char *) * ctx->takenbuffers_size);