Skip to content

Commit 45ff941

Browse files
committed
Add adaptive replacement cache
Current basic block management consumes a significant amount of memory, which leads to unnecessary waste due to frequent map allocation and release. Adaptive Replacement Cache (ARC) is a page replacement algorithm with better performance than least recently used (LRU). After the translated blocks are handled by ARC, better memory usage and hit rates can be achieved by keeping track of frequently used and recently used pages, as well as a recent eviction history for both. According to the cache information obtained while running CoreMark, the cache hit rate of ARC can reach over 99%.
1 parent a713b4c commit 45ff941

File tree

8 files changed

+776
-7
lines changed

8 files changed

+776
-7
lines changed

Makefile

+9
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,14 @@ gdbstub-test: $(BIN)
7474
$(Q)tests/gdbstub.sh && $(call notice, [OK])
7575
endif
7676

77+
# Import adaptive replacement cache
78+
ENABLE_ARCACHE ?= 1
79+
$(call set-feature, ARCACHE)
80+
81+
# Enable print cache information
82+
ENABLE_ARCACHE_INFO ?= 1
83+
$(call set-feature, ARCACHE_INFO)
84+
7785
# For tail-call elimination, we need a specific set of build flags applied.
7886
# FIXME: On macOS + Apple Silicon, -fno-stack-protector might have a negative impact.
7987
$(OUT)/emulate.o: CFLAGS += -fomit-frame-pointer -fno-stack-check -fno-stack-protector
@@ -93,6 +101,7 @@ OBJS := \
93101
emulate.o \
94102
riscv.o \
95103
elf.o \
104+
cache.o \
96105
$(OBJS_EXT) \
97106
main.o
98107

src/cache.c

+230
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
#include "cache.h"
2+
3+
#define min(a, b) ((a < b) ? a : b)
4+
#define max(a, b) ((a > b) ? a : b)
5+
#define BITS 10
6+
#define SIZE 1024
7+
#define GOLDEN_RATIO_32 0x61C88647
8+
#define HASH(val) (((val) *GOLDEN_RATIO_32) >> (32 - BITS)) % SIZE
9+
10+
typedef struct arc_entry {
11+
void *value;
12+
uint32_t key;
13+
arc_type_t arc_type;
14+
struct list_head list;
15+
struct list_head ht_list;
16+
} arc_entry_t;
17+
18+
typedef struct hashtable {
19+
struct list_head *ht_list_head;
20+
} hashtable_t;
21+
22+
cache_t *cache_create()
23+
{
24+
cache_t *cache = (cache_t *) malloc(sizeof(cache_t));
25+
for (int i = 0; i < 4; i++) {
26+
cache->list_table[i] =
27+
(struct list_head *) malloc(sizeof(struct list_head));
28+
INIT_LIST_HEAD(cache->list_table[i]);
29+
cache->list_size[i] = 0;
30+
}
31+
cache->map = (hashtable_t *) malloc(sizeof(hashtable_t));
32+
cache->map->ht_list_head =
33+
(struct list_head *) malloc(SIZE * sizeof(struct list_head));
34+
35+
for (int i = 0; i < SIZE; i++) {
36+
INIT_LIST_HEAD(&cache->map->ht_list_head[i]);
37+
}
38+
39+
cache->c = SIZE;
40+
cache->p = SIZE / 2;
41+
#if RV32_HAS(ARCACHE_INFO)
42+
cache->get_time = 0;
43+
cache->hit_time = 0;
44+
#endif
45+
return cache;
46+
}
47+
48+
void cache_free(cache_t *cache, void (*release_entry)(void *))
49+
{
50+
for (int i = 0; i < 4; i++) {
51+
arc_entry_t *entry, *safe;
52+
list_for_each_entry_safe(entry, safe, cache->list_table[i], list)
53+
release_entry(entry->value);
54+
free(cache->list_table[i]);
55+
}
56+
free(cache->map->ht_list_head);
57+
free(cache->map);
58+
free(cache);
59+
}
60+
61+
/* Rule of ARC
62+
* 1. size of T1 + size of T2 <= c
63+
* 2. size of T1 + size of B1 <= c
64+
* 3. size of T2 + size of B2 <= 2c
65+
* 4. size of T1 + size of T2 + size of B1 + size of B2 <= 2c
66+
*/
67+
#if RV32_HAS(ARCACHE_INFO)
68+
void assert_cache(cache_t *cache)
69+
{
70+
assert(cache->list_size[T1] + cache->list_size[T2] <= cache->c);
71+
assert(cache->list_size[T1] + cache->list_size[B1] <= cache->c);
72+
assert(cache->list_size[T2] + cache->list_size[B2] <= 2 * cache->c);
73+
assert(cache->list_size[T1] + cache->list_size[B1] + cache->list_size[T2] +
74+
cache->list_size[B2] <=
75+
2 * cache->c);
76+
}
77+
#endif
78+
79+
void move_to_mru(cache_t *cache, arc_entry_t *entry, const arc_type_t arc_type)
80+
{
81+
cache->list_size[entry->arc_type]--;
82+
cache->list_size[arc_type]++;
83+
entry->arc_type = arc_type;
84+
list_move(&entry->list, cache->list_table[arc_type]);
85+
}
86+
87+
void replaceT1(cache_t *cache)
88+
{
89+
if (cache->list_size[T1] >= cache->p)
90+
move_to_mru(cache,
91+
list_last_entry(cache->list_table[T1], arc_entry_t, list),
92+
B1);
93+
}
94+
void replaceT2(cache_t *cache)
95+
{
96+
if (cache->list_size[T2] >= (cache->c - cache->p))
97+
move_to_mru(cache,
98+
list_last_entry(cache->list_table[T2], arc_entry_t, list),
99+
B2);
100+
}
101+
102+
void *cache_get(cache_t *cache, uint32_t key)
103+
{
104+
if (cache->c <= 0 || list_empty(&cache->map->ht_list_head[HASH(key)]))
105+
return NULL;
106+
107+
arc_entry_t *entry = NULL;
108+
list_for_each_entry(entry, &cache->map->ht_list_head[HASH(key)], ht_list)
109+
{
110+
if (entry->key == key)
111+
break;
112+
}
113+
#if RV32_HAS(ARCACHE_INFO)
114+
cache->get_time++;
115+
#endif
116+
if (!entry || entry->key != key)
117+
return NULL;
118+
/* cache hit in T1 */
119+
if (entry->arc_type == T1) {
120+
#if RV32_HAS(ARCACHE_INFO)
121+
cache->hit_time++;
122+
#endif
123+
replaceT2(cache);
124+
move_to_mru(cache, entry, T2);
125+
}
126+
127+
/* cache hit in T2 */
128+
if (entry->arc_type == T2) {
129+
#if RV32_HAS(ARCACHE_INFO)
130+
cache->hit_time++;
131+
#endif
132+
move_to_mru(cache, entry, T2);
133+
}
134+
135+
/* cache hit in B1 */
136+
if (entry->arc_type == B1) {
137+
cache->p = min(cache->p + 1, cache->c);
138+
replaceT2(cache);
139+
move_to_mru(cache, entry, T2);
140+
}
141+
142+
/* cache hit in B2 */
143+
if (entry->arc_type == B2) {
144+
cache->p = max(cache->p - 1, 0);
145+
replaceT1(cache);
146+
move_to_mru(cache, entry, T2);
147+
}
148+
#if RV32_HAS(ARCACHE_INFO)
149+
assert_cache(cache);
150+
#endif
151+
/* return NULL if cache miss */
152+
return entry->value;
153+
}
154+
155+
void *cache_put(cache_t *cache, uint32_t key, void *value)
156+
{
157+
#if RV32_HAS(ARCACHE_INFO)
158+
cache->get_time++;
159+
#endif
160+
void *delete_value = NULL;
161+
#if RV32_HAS(ARCACHE_INFO)
162+
assert(cache->list_size[T1] + cache->list_size[B1] <= cache->c);
163+
#endif
164+
/* Before adding new element to cach, we should check the status
165+
* of cache.
166+
*/
167+
if ((cache->list_size[T1] + cache->list_size[B1]) == cache->c) {
168+
if (cache->list_size[T1] < cache->c) {
169+
arc_entry_t *delete_target =
170+
list_last_entry(cache->list_table[B1], arc_entry_t, list);
171+
list_del_init(&delete_target->list);
172+
list_del_init(&delete_target->ht_list);
173+
delete_value = delete_target->value;
174+
free(delete_target);
175+
cache->list_size[B1]--;
176+
replaceT1(cache);
177+
} else {
178+
arc_entry_t *delete_target =
179+
list_last_entry(cache->list_table[T1], arc_entry_t, list);
180+
list_del_init(&delete_target->list);
181+
list_del_init(&delete_target->ht_list);
182+
delete_value = delete_target->value;
183+
free(delete_target);
184+
cache->list_size[T1]--;
185+
}
186+
} else {
187+
#if RV32_HAS(ARCACHE_INFO)
188+
assert(cache->list_size[T1] + cache->list_size[B1] < cache->c);
189+
#endif
190+
uint32_t size = cache->list_size[T1] + cache->list_size[B1] +
191+
cache->list_size[T2] + cache->list_size[B2];
192+
if (size == cache->c * 2) {
193+
arc_entry_t *delete_target =
194+
list_last_entry(cache->list_table[B2], arc_entry_t, list);
195+
list_del_init(&delete_target->list);
196+
list_del_init(&delete_target->ht_list);
197+
delete_value = delete_target->value;
198+
free(delete_target);
199+
cache->list_size[B2]--;
200+
}
201+
if (cache->list_size[T1] + cache->list_size[T2] >= cache->c &&
202+
cache->list_size[T1] < cache->p)
203+
replaceT2(cache);
204+
else
205+
replaceT1(cache);
206+
}
207+
arc_entry_t *new_entry = (arc_entry_t *) malloc(sizeof(arc_entry_t));
208+
new_entry->key = key;
209+
new_entry->value = value;
210+
new_entry->arc_type = T1;
211+
list_add(&new_entry->list, cache->list_table[T1]);
212+
list_add(&new_entry->ht_list, &cache->map->ht_list_head[HASH(key)]);
213+
cache->list_size[T1]++;
214+
#if RV32_HAS(ARCACHE_INFO)
215+
assert_cache(cache);
216+
#endif
217+
return delete_value;
218+
}
219+
220+
#if RV32_HAS(ARCACHE_INFO)
221+
void cache_print_stats(cache_t *cache)
222+
{
223+
printf(
224+
"requests: %12lu \n"
225+
"hits: %12lu \n"
226+
"ratio: %lf%%\n",
227+
cache->get_time, cache->hit_time,
228+
cache->hit_time * 100 / (double) cache->get_time);
229+
}
230+
#endif

src/cache.h

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#include <assert.h>
2+
#include <stdbool.h>
3+
#include <stdint.h>
4+
#include <stdio.h>
5+
#include <stdlib.h>
6+
#include <string.h>
7+
8+
#include "list.h"
9+
10+
/*
11+
T1: LRU List
12+
T2: LFU List
13+
B1: LRU Ghost List
14+
B2: LFU Ghost List
15+
*/
16+
typedef enum { T1, B1, T2, B2 } arc_type_t;
17+
18+
struct hashtable;
19+
20+
typedef struct cache {
21+
struct list_head *list_table[4];
22+
uint32_t list_size[4];
23+
struct hashtable *map;
24+
uint32_t c;
25+
uint32_t p;
26+
#if RV32_HAS(ARCACHE_INFO)
27+
uint64_t get_time;
28+
uint64_t hit_time;
29+
#endif
30+
} cache_t;
31+
32+
cache_t *cache_create();
33+
34+
void cache_free(cache_t *cache, void (*release_entry)(void *));
35+
36+
void *cache_get(cache_t *cache, uint32_t key);
37+
38+
void *cache_put(cache_t *cache, uint32_t key, void *value);
39+
40+
#if RV32_HAS(ARCACHE_INFO)
41+
void cache_print_stats(cache_t *cache);
42+
#endif

src/emulate.c

+20-5
Original file line numberDiff line numberDiff line change
@@ -1250,7 +1250,7 @@ static bool insn_is_branch(uint8_t opcode)
12501250
}
12511251
return false;
12521252
}
1253-
1253+
#if !RV32_HAS(ARCACHE)
12541254
/* hash function is used when mapping address into the block map */
12551255
static uint32_t hash(size_t k)
12561256
{
@@ -1262,7 +1262,7 @@ static uint32_t hash(size_t k)
12621262
#endif
12631263
return k;
12641264
}
1265-
1265+
#endif
12661266
/* allocate a basic block */
12671267
static block_t *block_alloc(const uint8_t bits)
12681268
{
@@ -1273,7 +1273,7 @@ static block_t *block_alloc(const uint8_t bits)
12731273
block->ir = malloc(block->insn_capacity * sizeof(rv_insn_t));
12741274
return block;
12751275
}
1276-
1276+
#if !RV32_HAS(ARCACHE)
12771277
/* insert a block into block map */
12781278
static void block_insert(block_map_t *map, const block_t *block)
12791279
{
@@ -1309,7 +1309,7 @@ static block_t *block_find(const block_map_t *map, const uint32_t addr)
13091309
}
13101310
return NULL;
13111311
}
1312-
1312+
#endif
13131313
static void block_translate(riscv_t *rv, block_t *block)
13141314
{
13151315
block->pc_start = block->pc_end = rv->PC;
@@ -1342,24 +1342,39 @@ static void block_translate(riscv_t *rv, block_t *block)
13421342

13431343
static block_t *block_find_or_translate(riscv_t *rv, block_t *prev)
13441344
{
1345+
#if RV32_HAS(ARCACHE)
1346+
/* lookup the next block in the block cache */
1347+
block_t *next = (block_t *) cache_get(rv->cache, rv->PC);
1348+
#else
13451349
block_map_t *map = &rv->block_map;
13461350
/* lookup the next block in the block map */
13471351
block_t *next = block_find(map, rv->PC);
1348-
1352+
#endif
13491353
if (!next) {
1354+
#if !RV32_HAS(ARCACHE)
13501355
if (map->size * 1.25 > map->block_capacity) {
13511356
block_map_clear(map);
13521357
prev = NULL;
13531358
}
1359+
#endif
13541360

13551361
/* allocate a new block */
13561362
next = block_alloc(10);
13571363

13581364
/* translate the basic block */
13591365
block_translate(rv, next);
13601366

1367+
#if RV32_HAS(ARCACHE)
1368+
/* insert the block into block cache */
1369+
block_t *delete_target = cache_put(rv->cache, rv->PC, &(*next));
1370+
if (delete_target) {
1371+
free(delete_target->ir);
1372+
free(delete_target);
1373+
}
1374+
#else
13611375
/* insert the block into block map */
13621376
block_insert(&rv->block_map, next);
1377+
#endif
13631378

13641379
/* update the block prediction
13651380
* When we translate a new block, the block predictor may benefit,

src/feature.h

+10
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,16 @@
4848
#define RV32_FEATURE_GDBSTUB 1
4949
#endif
5050

51+
/* Import adaptive replacement cache to manage block */
52+
#ifndef RV32_FEATURE_ARCACHE
53+
#define RV32_FEATURE_ARCACHE 1
54+
#endif
55+
56+
/* Print cache information */
57+
#ifndef RV32_FEATURE_ARCACHE_INFO
58+
#define RV32_FEATURE_ARCACHE_INFO 1
59+
#endif
60+
5161
/* Feature test macro */
5262
#define RV32_HAS(x) RV32_FEATURE_##x
5363

0 commit comments

Comments
 (0)