Skip to content

Commit 3c61529

Browse files
GONG, Ruiqitehcaster
GONG, Ruiqi
authored andcommitted
Randomized slab caches for kmalloc()
When exploiting memory vulnerabilities, "heap spraying" is a common technique targeting those related to dynamic memory allocation (i.e. the "heap"), and it plays an important role in a successful exploitation. Basically, it is to overwrite the memory area of vulnerable object by triggering allocation in other subsystems or modules and therefore getting a reference to the targeted memory location. It's usable on various types of vulnerablity including use after free (UAF), heap out- of-bound write and etc. There are (at least) two reasons why the heap can be sprayed: 1) generic slab caches are shared among different subsystems and modules, and 2) dedicated slab caches could be merged with the generic ones. Currently these two factors cannot be prevented at a low cost: the first one is a widely used memory allocation mechanism, and shutting down slab merging completely via `slub_nomerge` would be overkill. To efficiently prevent heap spraying, we propose the following approach: to create multiple copies of generic slab caches that will never be merged, and random one of them will be used at allocation. The random selection is based on the address of code that calls `kmalloc()`, which means it is static at runtime (rather than dynamically determined at each time of allocation, which could be bypassed by repeatedly spraying in brute force). In other words, the randomness of cache selection will be with respect to the code address rather than time, i.e. allocations in different code paths would most likely pick different caches, although kmalloc() at each place would use the same cache copy whenever it is executed. In this way, the vulnerable object and memory allocated in other subsystems and modules will (most probably) be on different slab caches, which prevents the object from being sprayed. Meanwhile, the static random selection is further enhanced with a per-boot random seed, which prevents the attacker from finding a usable kmalloc that happens to pick the same cache with the vulnerable subsystem/module by analyzing the open source code. In other words, with the per-boot seed, the random selection is static during each time the system starts and runs, but not across different system startups. The overhead of performance has been tested on a 40-core x86 server by comparing the results of `perf bench all` between the kernels with and without this patch based on the latest linux-next kernel, which shows minor difference. A subset of benchmarks are listed below: sched/ sched/ syscall/ mem/ mem/ messaging pipe basic memcpy memset (sec) (sec) (sec) (GB/sec) (GB/sec) control1 0.019 5.459 0.733 15.258789 51.398026 control2 0.019 5.439 0.730 16.009221 48.828125 control3 0.019 5.282 0.735 16.009221 48.828125 control_avg 0.019 5.393 0.733 15.759077 49.684759 experiment1 0.019 5.374 0.741 15.500992 46.502976 experiment2 0.019 5.440 0.746 16.276042 51.398026 experiment3 0.019 5.242 0.752 15.258789 51.398026 experiment_avg 0.019 5.352 0.746 15.678608 49.766343 The overhead of memory usage was measured by executing `free` after boot on a QEMU VM with 1GB total memory, and as expected, it's positively correlated with # of cache copies: control 4 copies 8 copies 16 copies total 969.8M 968.2M 968.2M 968.2M used 20.0M 21.9M 24.1M 26.7M free 936.9M 933.6M 931.4M 928.6M available 932.2M 928.8M 926.6M 923.9M Co-developed-by: Xiu Jianfeng <[email protected]> Signed-off-by: Xiu Jianfeng <[email protected]> Signed-off-by: GONG, Ruiqi <[email protected]> Reviewed-by: Kees Cook <[email protected]> Reviewed-by: Hyeonggon Yoo <[email protected]> Acked-by: Dennis Zhou <[email protected]> # percpu Signed-off-by: Vlastimil Babka <[email protected]>
1 parent 06c2afb commit 3c61529

File tree

7 files changed

+97
-15
lines changed

7 files changed

+97
-15
lines changed

include/linux/percpu.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,20 @@
3535
#define PCPU_BITMAP_BLOCK_BITS (PCPU_BITMAP_BLOCK_SIZE >> \
3636
PCPU_MIN_ALLOC_SHIFT)
3737

38+
#ifdef CONFIG_RANDOM_KMALLOC_CACHES
39+
#define PERCPU_DYNAMIC_SIZE_SHIFT 12
40+
#else
41+
#define PERCPU_DYNAMIC_SIZE_SHIFT 10
42+
#endif
43+
3844
/*
3945
* Percpu allocator can serve percpu allocations before slab is
4046
* initialized which allows slab to depend on the percpu allocator.
4147
* The following parameter decide how much resource to preallocate
4248
* for this. Keep PERCPU_DYNAMIC_RESERVE equal to or larger than
4349
* PERCPU_DYNAMIC_EARLY_SIZE.
4450
*/
45-
#define PERCPU_DYNAMIC_EARLY_SIZE (20 << 10)
51+
#define PERCPU_DYNAMIC_EARLY_SIZE (20 << PERCPU_DYNAMIC_SIZE_SHIFT)
4652

4753
/*
4854
* PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy
@@ -56,9 +62,9 @@
5662
* intelligent way to determine this would be nice.
5763
*/
5864
#if BITS_PER_LONG > 32
59-
#define PERCPU_DYNAMIC_RESERVE (28 << 10)
65+
#define PERCPU_DYNAMIC_RESERVE (28 << PERCPU_DYNAMIC_SIZE_SHIFT)
6066
#else
61-
#define PERCPU_DYNAMIC_RESERVE (20 << 10)
67+
#define PERCPU_DYNAMIC_RESERVE (20 << PERCPU_DYNAMIC_SIZE_SHIFT)
6268
#endif
6369

6470
extern void *pcpu_base_addr;

include/linux/slab.h

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <linux/workqueue.h>
2020
#include <linux/percpu-refcount.h>
2121
#include <linux/cleanup.h>
22+
#include <linux/hash.h>
2223

2324

2425
/*
@@ -345,6 +346,12 @@ static inline unsigned int arch_slab_minalign(void)
345346
#define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \
346347
(KMALLOC_MIN_SIZE) : 16)
347348

349+
#ifdef CONFIG_RANDOM_KMALLOC_CACHES
350+
#define RANDOM_KMALLOC_CACHES_NR 15 // # of cache copies
351+
#else
352+
#define RANDOM_KMALLOC_CACHES_NR 0
353+
#endif
354+
348355
/*
349356
* Whenever changing this, take care of that kmalloc_type() and
350357
* create_kmalloc_caches() still work as intended.
@@ -361,6 +368,8 @@ enum kmalloc_cache_type {
361368
#ifndef CONFIG_MEMCG_KMEM
362369
KMALLOC_CGROUP = KMALLOC_NORMAL,
363370
#endif
371+
KMALLOC_RANDOM_START = KMALLOC_NORMAL,
372+
KMALLOC_RANDOM_END = KMALLOC_RANDOM_START + RANDOM_KMALLOC_CACHES_NR,
364373
#ifdef CONFIG_SLUB_TINY
365374
KMALLOC_RECLAIM = KMALLOC_NORMAL,
366375
#else
@@ -386,14 +395,22 @@ kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
386395
(IS_ENABLED(CONFIG_ZONE_DMA) ? __GFP_DMA : 0) | \
387396
(IS_ENABLED(CONFIG_MEMCG_KMEM) ? __GFP_ACCOUNT : 0))
388397

389-
static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags)
398+
extern unsigned long random_kmalloc_seed;
399+
400+
static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigned long caller)
390401
{
391402
/*
392403
* The most common case is KMALLOC_NORMAL, so test for it
393404
* with a single branch for all the relevant flags.
394405
*/
395406
if (likely((flags & KMALLOC_NOT_NORMAL_BITS) == 0))
407+
#ifdef CONFIG_RANDOM_KMALLOC_CACHES
408+
/* RANDOM_KMALLOC_CACHES_NR (=15) copies + the KMALLOC_NORMAL */
409+
return KMALLOC_RANDOM_START + hash_64(caller ^ random_kmalloc_seed,
410+
ilog2(RANDOM_KMALLOC_CACHES_NR + 1));
411+
#else
396412
return KMALLOC_NORMAL;
413+
#endif
397414

398415
/*
399416
* At least one of the flags has to be set. Their priorities in
@@ -580,7 +597,7 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
580597

581598
index = kmalloc_index(size);
582599
return kmalloc_trace(
583-
kmalloc_caches[kmalloc_type(flags)][index],
600+
kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
584601
flags, size);
585602
}
586603
return __kmalloc(size, flags);
@@ -596,7 +613,7 @@ static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t fla
596613

597614
index = kmalloc_index(size);
598615
return kmalloc_node_trace(
599-
kmalloc_caches[kmalloc_type(flags)][index],
616+
kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
600617
flags, node, size);
601618
}
602619
return __kmalloc_node(size, flags, node);

mm/Kconfig

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,23 @@ config SLUB_CPU_PARTIAL
337337
which requires the taking of locks that may cause latency spikes.
338338
Typically one would choose no for a realtime system.
339339

340+
config RANDOM_KMALLOC_CACHES
341+
default n
342+
depends on SLUB && !SLUB_TINY
343+
bool "Randomize slab caches for normal kmalloc"
344+
help
345+
A hardening feature that creates multiple copies of slab caches for
346+
normal kmalloc allocation and makes kmalloc randomly pick one based
347+
on code address, which makes the attackers more difficult to spray
348+
vulnerable memory objects on the heap for the purpose of exploiting
349+
memory vulnerabilities.
350+
351+
Currently the number of copies is set to 16, a reasonably large value
352+
that effectively diverges the memory objects allocated for different
353+
subsystems or modules into different caches, at the expense of a
354+
limited degree of memory and CPU overhead that relates to hardware and
355+
system workload.
356+
340357
endmenu # SLAB allocator options
341358

342359
config SHUFFLE_PAGE_ALLOCATOR

mm/kfence/kfence_test.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,9 @@ static void test_cache_destroy(void)
212212

213213
static inline size_t kmalloc_cache_alignment(size_t size)
214214
{
215-
return kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)]->align;
215+
/* just to get ->align so no need to pass in the real caller */
216+
enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, 0);
217+
return kmalloc_caches[type][__kmalloc_index(size, false)]->align;
216218
}
217219

218220
/* Must always inline to match stack trace against caller. */
@@ -282,8 +284,9 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat
282284

283285
if (is_kfence_address(alloc)) {
284286
struct slab *slab = virt_to_slab(alloc);
287+
enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, _RET_IP_);
285288
struct kmem_cache *s = test_cache ?:
286-
kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)];
289+
kmalloc_caches[type][__kmalloc_index(size, false)];
287290

288291
/*
289292
* Verify that various helpers return the right values

mm/slab.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1670,7 +1670,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
16701670
if (freelist_size > KMALLOC_MAX_CACHE_SIZE) {
16711671
freelist_cache_size = PAGE_SIZE << get_order(freelist_size);
16721672
} else {
1673-
freelist_cache = kmalloc_slab(freelist_size, 0u);
1673+
freelist_cache = kmalloc_slab(freelist_size, 0u, _RET_IP_);
16741674
if (!freelist_cache)
16751675
continue;
16761676
freelist_cache_size = freelist_cache->size;

mm/slab.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ void setup_kmalloc_cache_index_table(void);
282282
void create_kmalloc_caches(slab_flags_t);
283283

284284
/* Find the kmalloc slab corresponding for a certain size */
285-
struct kmem_cache *kmalloc_slab(size_t, gfp_t);
285+
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller);
286286

287287
void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags,
288288
int node, size_t orig_size,

mm/slab_common.c

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,11 @@ kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init =
678678
{ /* initialization for https://bugs.llvm.org/show_bug.cgi?id=42570 */ };
679679
EXPORT_SYMBOL(kmalloc_caches);
680680

681+
#ifdef CONFIG_RANDOM_KMALLOC_CACHES
682+
unsigned long random_kmalloc_seed __ro_after_init;
683+
EXPORT_SYMBOL(random_kmalloc_seed);
684+
#endif
685+
681686
/*
682687
* Conversion table for small slabs sizes / 8 to the index in the
683688
* kmalloc array. This is necessary for slabs < 192 since we have non power
@@ -720,7 +725,7 @@ static inline unsigned int size_index_elem(unsigned int bytes)
720725
* Find the kmem_cache structure that serves a given size of
721726
* allocation
722727
*/
723-
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
728+
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller)
724729
{
725730
unsigned int index;
726731

@@ -735,7 +740,7 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
735740
index = fls(size - 1);
736741
}
737742

738-
return kmalloc_caches[kmalloc_type(flags)][index];
743+
return kmalloc_caches[kmalloc_type(flags, caller)][index];
739744
}
740745

741746
size_t kmalloc_size_roundup(size_t size)
@@ -752,8 +757,11 @@ size_t kmalloc_size_roundup(size_t size)
752757
if (size > KMALLOC_MAX_CACHE_SIZE)
753758
return PAGE_SIZE << get_order(size);
754759

755-
/* The flags don't matter since size_index is common to all. */
756-
c = kmalloc_slab(size, GFP_KERNEL);
760+
/*
761+
* The flags don't matter since size_index is common to all.
762+
* Neither does the caller for just getting ->object_size.
763+
*/
764+
c = kmalloc_slab(size, GFP_KERNEL, 0);
757765
return c ? c->object_size : 0;
758766
}
759767
EXPORT_SYMBOL(kmalloc_size_roundup);
@@ -776,12 +784,35 @@ EXPORT_SYMBOL(kmalloc_size_roundup);
776784
#define KMALLOC_RCL_NAME(sz)
777785
#endif
778786

787+
#ifdef CONFIG_RANDOM_KMALLOC_CACHES
788+
#define __KMALLOC_RANDOM_CONCAT(a, b) a ## b
789+
#define KMALLOC_RANDOM_NAME(N, sz) __KMALLOC_RANDOM_CONCAT(KMA_RAND_, N)(sz)
790+
#define KMA_RAND_1(sz) .name[KMALLOC_RANDOM_START + 1] = "kmalloc-rnd-01-" #sz,
791+
#define KMA_RAND_2(sz) KMA_RAND_1(sz) .name[KMALLOC_RANDOM_START + 2] = "kmalloc-rnd-02-" #sz,
792+
#define KMA_RAND_3(sz) KMA_RAND_2(sz) .name[KMALLOC_RANDOM_START + 3] = "kmalloc-rnd-03-" #sz,
793+
#define KMA_RAND_4(sz) KMA_RAND_3(sz) .name[KMALLOC_RANDOM_START + 4] = "kmalloc-rnd-04-" #sz,
794+
#define KMA_RAND_5(sz) KMA_RAND_4(sz) .name[KMALLOC_RANDOM_START + 5] = "kmalloc-rnd-05-" #sz,
795+
#define KMA_RAND_6(sz) KMA_RAND_5(sz) .name[KMALLOC_RANDOM_START + 6] = "kmalloc-rnd-06-" #sz,
796+
#define KMA_RAND_7(sz) KMA_RAND_6(sz) .name[KMALLOC_RANDOM_START + 7] = "kmalloc-rnd-07-" #sz,
797+
#define KMA_RAND_8(sz) KMA_RAND_7(sz) .name[KMALLOC_RANDOM_START + 8] = "kmalloc-rnd-08-" #sz,
798+
#define KMA_RAND_9(sz) KMA_RAND_8(sz) .name[KMALLOC_RANDOM_START + 9] = "kmalloc-rnd-09-" #sz,
799+
#define KMA_RAND_10(sz) KMA_RAND_9(sz) .name[KMALLOC_RANDOM_START + 10] = "kmalloc-rnd-10-" #sz,
800+
#define KMA_RAND_11(sz) KMA_RAND_10(sz) .name[KMALLOC_RANDOM_START + 11] = "kmalloc-rnd-11-" #sz,
801+
#define KMA_RAND_12(sz) KMA_RAND_11(sz) .name[KMALLOC_RANDOM_START + 12] = "kmalloc-rnd-12-" #sz,
802+
#define KMA_RAND_13(sz) KMA_RAND_12(sz) .name[KMALLOC_RANDOM_START + 13] = "kmalloc-rnd-13-" #sz,
803+
#define KMA_RAND_14(sz) KMA_RAND_13(sz) .name[KMALLOC_RANDOM_START + 14] = "kmalloc-rnd-14-" #sz,
804+
#define KMA_RAND_15(sz) KMA_RAND_14(sz) .name[KMALLOC_RANDOM_START + 15] = "kmalloc-rnd-15-" #sz,
805+
#else // CONFIG_RANDOM_KMALLOC_CACHES
806+
#define KMALLOC_RANDOM_NAME(N, sz)
807+
#endif
808+
779809
#define INIT_KMALLOC_INFO(__size, __short_size) \
780810
{ \
781811
.name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \
782812
KMALLOC_RCL_NAME(__short_size) \
783813
KMALLOC_CGROUP_NAME(__short_size) \
784814
KMALLOC_DMA_NAME(__short_size) \
815+
KMALLOC_RANDOM_NAME(RANDOM_KMALLOC_CACHES_NR, __short_size) \
785816
.size = __size, \
786817
}
787818

@@ -890,6 +921,11 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
890921
flags |= SLAB_CACHE_DMA;
891922
}
892923

924+
#ifdef CONFIG_RANDOM_KMALLOC_CACHES
925+
if (type >= KMALLOC_RANDOM_START && type <= KMALLOC_RANDOM_END)
926+
flags |= SLAB_NO_MERGE;
927+
#endif
928+
893929
/*
894930
* If CONFIG_MEMCG_KMEM is enabled, disable cache merging for
895931
* KMALLOC_NORMAL caches.
@@ -941,6 +977,9 @@ void __init create_kmalloc_caches(slab_flags_t flags)
941977
new_kmalloc_cache(2, type, flags);
942978
}
943979
}
980+
#ifdef CONFIG_RANDOM_KMALLOC_CACHES
981+
random_kmalloc_seed = get_random_u64();
982+
#endif
944983

945984
/* Kmalloc array is now usable */
946985
slab_state = UP;
@@ -976,7 +1015,7 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller
9761015
return ret;
9771016
}
9781017

979-
s = kmalloc_slab(size, flags);
1018+
s = kmalloc_slab(size, flags, caller);
9801019

9811020
if (unlikely(ZERO_OR_NULL_PTR(s)))
9821021
return s;

0 commit comments

Comments
 (0)