-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathCuckooMap.h
425 lines (385 loc) · 13.2 KB
/
CuckooMap.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
#ifndef CUCKOO_MAP_H
#define CUCKOO_MAP_H 1
#include <iostream>
#include <memory>
#include <mutex>
#include <vector>
#include "InternalCuckooMap.h"
// In the following template:
// Key is the key type, it must be copyable and movable, furthermore, Key
// must be default constructible (without arguments) as empty and
// must have an empty() method to indicate that the instance is
// empty. If using fasthash64 on all bytes of the object is not
// a suitable hash function, one has to instanciate the template
// with two hash function types as 3rd and 4th argument. If
// std::equal_to<Key> is not implemented or does not behave
// correctly, one has to supply a comparison class as well.
// Value is the value type, it is not actually used anywhere in the
// template except as Value* for input and output of values. The
// template parameter basically only serves as a convenience to
// provide defaults for valueAlign and valueSize and to reduce
// casts. Values are passed in and out as a Value* to allow for
// runtime configuration of the byte size and alignment. Within the
// table no constructors or destructors or assignment operators are
// called for Value, the data is only copied with std::memcpy. So Value
// must only contain POD!
// This class is thread safe and can safely be used from multiple threads.
// Mutexes are built in, note that a lookup returns a `Finding` object which
// keeps a mutex until it is destroyed. This for example allows to change
// values that are actually currently stored in the map. Keys must only be
// changed as long as their hash and fingerprint does not change!
template <class Key, class Value,
class HashKey1 = HashWithSeed<Key, 0xdeadbeefdeadbeefULL>,
class HashKey2 = HashWithSeed<Key, 0xabcdefabcdef1234ULL>,
class CompKey = std::equal_to<Key>,
class HashShort = HashWithSeed<uint16_t, 0xfedcbafedcba4321ULL>>
class CuckooMap {
public:
typedef Key KeyType; // these are for ShardedMap
typedef Value ValueType;
typedef HashKey1 HashKey1Type;
typedef HashKey2 HashKey2Type;
typedef CompKey CompKeyType;
typedef InternalCuckooMap<Key, Value, HashKey1, HashKey2, CompKey> Subtable;
typedef CuckooFilter<Key, HashKey1, HashKey2, HashShort, CompKey> Filter;
private:
size_t _firstSize;
size_t _valueSize;
size_t _valueAlign;
CompKey _compKey;
public:
CuckooMap(size_t firstSize, size_t valueSize = sizeof(Value),
size_t valueAlign = alignof(Value), bool useFilters = false)
: _firstSize(firstSize),
_valueSize(valueSize),
_valueAlign(valueAlign),
_randState(0x2636283625154737ULL),
_dummyFilter(false, 0),
_nrUsed(0),
_useFilters(useFilters) {
auto t = new Subtable(false, firstSize, valueSize, valueAlign);
try {
_tables.emplace_back(t);
} catch (...) {
delete t;
throw;
}
if (_useFilters) {
auto f = new Filter(false, _tables.back()->capacity());
try {
_filters.emplace_back(f);
} catch (...) {
delete f;
throw;
}
}
}
struct Finding {
// This struct has two different duties: First it represents a guard
// for the _mutex of a CuckooMap. Secondly, it indicates what the
// result of a lookup was and allows subsequently to modify key
// (provided the hash values remain the same) and the value. This means
// that the Finding object can have a current pair or not. If it has a
// crrent key/value pair, one can remove it and one can lookup
// further keys or insert pairs whilst holding the mutex. The fact
// whether or not a key was found is indicated by key() returning
// a non-null pointer or nullptr. The instances releases the mutex
// at destruction, if _map is not a nullptr.
friend class CuckooMap;
Key* key() const { return _key; }
Value* value() const { return _value; }
private:
Key* _key;
Value* _value;
CuckooMap* _map;
int32_t _layer;
public:
Finding(Key* k, Value* v, CuckooMap* m, int32_t l)
: _key(k), _value(v), _map(m), _layer(l) {}
constexpr Finding() noexcept : _key(nullptr), _value(nullptr), _map(nullptr), _layer(-1) {}
~Finding() {
if (_map != nullptr) {
_map->release();
}
}
Finding(Finding const& other) = delete;
Finding& operator=(Finding const& other) = delete;
// Allow moving, we need this to allow for copy elision where we
// return by value:
public:
Finding(Finding&& other) {
std::cout << "move" << std::endl;
if (_map != nullptr) {
_map->release();
}
_map = other._map;
other._map = nullptr;
_key = other._key;
_value = other._value;
_layer = other._layer;
other._key = nullptr;
}
Finding& operator=(Finding&& other) {
std::cout << "move assign" << std::endl;
if (_map != nullptr) {
_map->release();
}
_map = other._map;
other._map = nullptr;
_key = other._key;
_value = other._value;
_layer = other._layer;
other._key = nullptr;
return *this;
}
// Return 1 if something was found and 0 otherwise. If this returns 0,
// then key() and value() are undefined.
int32_t found() const { return (_map != nullptr && _key != nullptr) ? 1 : 0; }
// The following are only relevant for CuckooMultiMaps, we add the method
// here to keep the API consistent.
bool next() { return false; }
bool get(int32_t pos) { return false; }
};
Finding lookup(Key const& k) {
// look up a key, return an object describing the findings. If
// this->found() returns 0 then no pair with key k was found and the
// pointers are undefined. If this->found() is a non-negative value,
// then pointers key and value are set to point to the pair in the
// table and one may modify *k and *v, provided the hash values of
// *k do not change. Note that all accesses to the table are blocked
// as long as the resulting object stays alive.
// Usage example:
// Key k;
// { // this scope is necessary to unlock the table
// auto res = lookup(k);
// if (res.found() > 0) {
// // work with *res.key() and *res.value()
// }
// }
MyMutexGuard guard(_mutex);
Finding f(nullptr, nullptr, this, -1);
innerLookup(k, f, true);
guard.release();
return f;
}
bool lookup(Key const& k, Finding& f) {
if (f._map != this) {
f._map->_mutex.unlock();
f._map = this;
_mutex.lock();
}
f._key = nullptr;
innerLookup(k, f, true);
return f.found() > 0;
}
bool insert(Key const& k, Value const* v) {
// inserts a pair (k, v) into the table
// returns true if the insertion took place and false if there was
// already a pair with the same key k in the table, in which case
// the table is unchanged.
MyMutexGuard guard(_mutex);
return innerInsert(k, v, nullptr, -1);
}
bool insert(Key const& k, Value const* v, Finding& f) {
if (f._map != this) {
f._map->_mutex.unlock();
f._map = this;
_mutex.lock();
}
bool res = innerInsert(k, v, nullptr, -1);
f._key = nullptr;
return res;
}
bool remove(Key const& k) {
// remove the pair with key k, if one is in the table. Return true if
// a pair was removed and false otherwise.
MyMutexGuard guard(_mutex);
Finding f(nullptr, nullptr, this, -1);
innerLookup(k, f, false);
guard.release();
if (f.found() == 0) {
return false;
}
innerRemove(f);
return true;
}
bool remove(Finding& f) {
if (f._map != this) {
f._map->_mutex.unlock();
f._map = this;
_mutex.lock();
}
if (f._key == nullptr) {
return false;
}
innerRemove(f);
return true;
}
uint64_t nrUsed() const {
MyMutexGuard guard(_mutex);
return _nrUsed;
}
private:
void innerLookup(Key const& k, Finding& f, bool moveToFront) {
char buffer[_valueSize];
// f must be initialized with _key == nullptr
for (int32_t layer = 0; static_cast<uint32_t>(layer) < _tables.size();
++layer) {
Subtable& sub = *_tables[layer];
Filter& filter = _useFilters ? *_filters[layer] : _dummyFilter;
Key* key;
Value* value;
bool found = _useFilters ? (filter.lookup(k) && sub.lookup(k, key, value))
: sub.lookup(k, key, value);
if (found) {
f._key = key;
f._value = value;
f._layer = layer;
if (moveToFront && layer > 0) {
uint8_t fromBack = _tables.size() - layer;
uint8_t denominator = (fromBack >= 6) ? (2 << 6) : (2 << fromBack);
uint8_t mask = denominator - 1;
uint8_t r = pseudoRandomChoice();
if ((r & mask) == 0) {
Key kCopy = *key;
memcpy(buffer, value, _valueSize);
Value* vCopy = reinterpret_cast<Value*>(&buffer);
innerRemove(f);
innerInsert(kCopy, vCopy, &f, layer - 1);
}
}
return;
};
}
}
bool innerInsert(Key const& k, Value const* v, Finding* f, int layerHint) {
// inserts a pair (k, v) into the table
// returns true if the insertion took place and false if there was
// already a pair with the same key k in the table, in which case
// the table is unchanged.
Key kCopy = k;
Key originalKey = k;
Key originalKeyAtLayer = k;
char buffer[_valueSize];
memcpy(buffer, v, _valueSize);
Value* vCopy = reinterpret_cast<Value*>(&buffer);
int32_t lastLayer = _tables.size() - 1;
int32_t layer = (layerHint < 0) ? lastLayer : layerHint;
int res;
bool filterRes;
bool somethingExpunged = true;
while (static_cast<uint32_t>(layer) < _tables.size()) {
Subtable& sub = *_tables[layer];
Filter& filter = _useFilters ? *_filters[layer] : _dummyFilter;
int maxRounds = (layerHint < 0) ? 128 : 4;
for (int i = 0; i < maxRounds; ++i) {
if (f != nullptr && _compKey(originalKey, kCopy)) {
res = sub.insert(kCopy, vCopy, &(f->_key), &(f->_value));
f->_layer = layer;
} else {
res = sub.insert(kCopy, vCopy, nullptr, nullptr);
}
if (res < 0) { // key is already in the table
return false;
} else if (res == 0) {
if (_useFilters) {
filterRes = filter.insert(originalKeyAtLayer);
if (!filterRes) {
throw;
}
}
++_nrUsed;
somethingExpunged = false;
break;
}
}
// check if table is too full; if so, expunge a random element
if (!somethingExpunged && sub.overfull()) {
bool expunged = sub.expungeRandom(kCopy, vCopy);
if (!expunged) {
throw;
}
somethingExpunged = true;
}
if (somethingExpunged) {
if (_useFilters && !_compKey(kCopy, originalKeyAtLayer)) {
filterRes = filter.remove(kCopy);
if (!filterRes) {
throw;
}
filterRes = filter.insert(originalKeyAtLayer);
if (!filterRes) {
throw;
}
originalKeyAtLayer = kCopy;
}
layer = (layer == lastLayer) ? layer + 1 : lastLayer;
} else {
return true;
}
}
// If we get here, then some pair has been expunged from all tables and
// we have to append a new table:
uint64_t lastSize = _tables.back()->capacity();
/*std::cout << "Insertion failure at level " << _tables.size() - 1 << " at
"
<< 100.0 *
(((double)_tables.back()->nrUsed()) / ((double)lastSize))
<< "% capacity with cold " << coldInsert << std::endl;*/
bool useMmap = (_tables.size() >= 3);
auto t = new Subtable(useMmap, lastSize * 4, _valueSize, _valueAlign);
try {
_tables.emplace_back(t);
} catch (...) {
delete t;
throw;
}
if (_useFilters) {
auto fil = new Filter(useMmap, lastSize * 4);
try {
_filters.emplace_back(fil);
} catch (...) {
delete f;
throw;
}
}
originalKeyAtLayer = kCopy;
while (res > 0) {
if (f != nullptr && _compKey(originalKey, kCopy)) {
res = _tables.back()->insert(kCopy, vCopy, &(f->_key), &(f->_value));
f->_layer = layer;
} else {
res = _tables.back()->insert(kCopy, vCopy, nullptr, nullptr);
}
}
if (_useFilters) {
filterRes = _filters.back()->insert(originalKeyAtLayer);
if (!filterRes) {
throw;
}
}
++_nrUsed;
return true;
}
uint8_t pseudoRandomChoice() {
_randState = _randState * 997 + 17; // ignore overflows
return static_cast<uint8_t>((_randState >> 37) & 0xff);
}
void release() { _mutex.unlock(); }
void innerRemove(Finding& f) {
if (_useFilters) {
_filters[f._layer]->remove(*(f._key));
}
_tables[f._layer]->remove(f._key, f._value);
f._key = nullptr;
--_nrUsed;
}
uint64_t _randState; // pseudo random state for move-to-front heuristic
std::vector<std::unique_ptr<Subtable>> _tables;
std::vector<std::unique_ptr<Filter>> _filters;
Filter _dummyFilter;
std::mutex _mutex;
uint64_t _nrUsed;
bool _useFilters;
};
#endif