forked from ShawnZhong/MadFS
-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathlog_entry.h
More file actions
167 lines (152 loc) · 6.01 KB
/
log_entry.h
File metadata and controls
167 lines (152 loc) · 6.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#pragma once
#include "alloc/block.h"
#include "cursor/log.h"
#include "idx.h"
namespace madfs::dram {
class LogEntryAllocator {
BlockAllocator* block_allocator;
MemTable* mem_table;
// the current in-use log entry block
pmem::LogEntryBlock* curr_log_block{nullptr};
LogicalBlockIdx curr_log_block_idx{0};
LogLocalOffset curr_log_offset{0}; // offset of the next available byte
public:
LogEntryAllocator(BlockAllocator* block_allocator, MemTable* mem_table)
: block_allocator(block_allocator), mem_table(mem_table) {}
/**
* populate log entries required by a single transaction; do persist but not
* fenced
*
* @param op operation code, e.g., LOG_OVERWRITE
* @param leftover_bytes remaining empty bytes in the last block
* @param num_blocks total number blocks touched
* @param begin_vidx start of virtual index
* @param begin_lidxs ordered list of logical indices for each chunk of
* virtual index
* @return a cursor pointing to the first log entry
*/
LogCursor append(pmem::LogEntry::Op op, uint16_t leftover_bytes,
uint32_t num_blocks, VirtualBlockIdx begin_vidx,
const std::vector<LogicalBlockIdx>& begin_lidxs) {
const LogCursor head = this->alloc(num_blocks);
LogCursor log_cursor = head;
// i to iterate through begin_lidxs across entries
// j to iterate within each entry
uint32_t i, j;
i = 0;
while (true) {
log_cursor->op = op;
log_cursor->begin_vidx = begin_vidx;
for (j = 0; j < log_cursor->get_lidxs_len(); ++j)
log_cursor->begin_lidxs[j] = begin_lidxs[i + j];
if (log_cursor->has_next) {
log_cursor->leftover_bytes = 0;
log_cursor->persist();
i += j;
begin_vidx += (j << BITMAP_ENTRY_BLOCKS_CAPACITY_SHIFT);
log_cursor.advance(mem_table);
} else { // last entry
log_cursor->leftover_bytes = leftover_bytes;
log_cursor->persist();
break;
}
}
return head;
}
private:
/**
* Allocate a linked list of log entry that could fit a mapping of the given
* length
*
* @param num_blocks how long this mapping should be
* @return a log cursor pointing to the first log entry
*/
LogCursor alloc(uint32_t num_blocks) {
// for a log entry with only one logical block index, it takes 16 bytes
// if smaller than that, do not try to allocate log entry there
constexpr uint32_t min_required_size =
pmem::LogEntry::FIXED_SIZE + sizeof(LogicalBlockIdx);
if (curr_log_block_idx == 0 ||
BLOCK_SIZE - curr_log_offset < min_required_size) {
// no enough space left, do block allocation
curr_log_block_idx = block_allocator->alloc(1);
curr_log_block =
&mem_table->lidx_to_addr_rw(curr_log_block_idx)->log_entry_block;
curr_log_offset = 0;
}
LogEntryIdx first_idx = {curr_log_block_idx, curr_log_offset};
pmem::LogEntryBlock* first_block = curr_log_block;
pmem::LogEntry* first_entry = curr_log_block->get(curr_log_offset);
pmem::LogEntry* curr_entry = first_entry;
uint32_t needed_lidxs_cnt =
ALIGN_UP(num_blocks, BITMAP_ENTRY_BLOCKS_CAPACITY) >>
BITMAP_ENTRY_BLOCKS_CAPACITY_SHIFT;
while (true) {
assert(curr_entry);
curr_log_offset += pmem::LogEntry::FIXED_SIZE;
uint32_t avail_lidxs_cnt =
(BLOCK_SIZE - curr_log_offset) / sizeof(LogicalBlockIdx);
assert(avail_lidxs_cnt > 0);
if (needed_lidxs_cnt <= avail_lidxs_cnt) {
curr_entry->has_next = false;
curr_entry->num_blocks = num_blocks;
curr_log_offset += needed_lidxs_cnt * sizeof(LogicalBlockIdx);
return {first_idx, first_block};
}
curr_entry->has_next = true;
curr_entry->num_blocks = avail_lidxs_cnt
<< BITMAP_ENTRY_BLOCKS_CAPACITY_SHIFT;
curr_log_offset += avail_lidxs_cnt * sizeof(LogicalBlockIdx);
needed_lidxs_cnt -= avail_lidxs_cnt;
num_blocks -= curr_entry->num_blocks;
assert(curr_log_offset <= BLOCK_SIZE);
if (BLOCK_SIZE - curr_log_offset < min_required_size) {
curr_log_block_idx = block_allocator->alloc(1);
curr_log_block =
&mem_table->lidx_to_addr_rw(curr_log_block_idx)->log_entry_block;
curr_log_offset = 0;
curr_entry->is_next_same_block = false;
curr_entry->next.block_idx = curr_log_block_idx;
} else {
curr_entry->is_next_same_block = true;
curr_entry->next.local_offset = curr_log_offset;
}
curr_entry = curr_log_block->get(curr_log_offset);
}
}
public:
/**
* a log entry is discarded because reset_log_entry() is called before commit;
* the uncommitted entry must be (semi-)freed to prevent memory leak
*
* return log entry blocks that are exclusively taken by this log entry; if
* there is other log entries on this block, leave this block alone
*/
void free(LogCursor log_cursor) {
// NOTE: we assume free() will always keep the block in the local free list
// instead of publishing it immediately. this makes it safe to read
// the log entry block even after calling free()
// do we need to free the first le block? no if there is other le ahead
if (log_cursor.idx.local_offset == 0)
block_allocator->free(log_cursor.idx.block_idx);
LogicalBlockIdx prev_block_idx = log_cursor.idx.block_idx;
while (log_cursor.advance(mem_table)) {
if (prev_block_idx != log_cursor.idx.block_idx) {
prev_block_idx = log_cursor.idx.block_idx;
block_allocator->free(prev_block_idx);
}
}
}
/**
* when moving into a new tx block, reset states associated with log entry
* allocation so that the next time calling alloc_log_entry will allocate from
* a new log entry block
*/
void reset() {
// this is to trigger new log entry block allocation
curr_log_block_idx = 0;
// technically, setting curr_log_block and curr_log_offset are unnecessary
// because they are guarded by setting curr_log_block_idx zero
}
};
} // namespace madfs::dram