Skip to content

Commit b53b63d

Browse files
author
Alexei Starovoitov
committed
Merge branch 'support-eliding-map-lookup-nullness'
Daniel Xu says: ==================== Support eliding map lookup nullness This patch allows progs to elide a null check on statically known map lookup keys. In other words, if the verifier can statically prove that the lookup will be in-bounds, allow the prog to drop the null check. This is useful for two reasons: 1. Large numbers of nullness checks (especially when they cannot fail) unnecessarily pushes prog towards BPF_COMPLEXITY_LIMIT_JMP_SEQ. 2. It forms a tighter contract between programmer and verifier. For (1), bpftrace is starting to make heavier use of percpu scratch maps. As a result, for user scripts with large number of unrolled loops, we are starting to hit jump complexity verification errors. These percpu lookups cannot fail anyways, as we only use static key values. Eliding nullness probably results in less work for verifier as well. For (2), percpu scratch maps are often used as a larger stack, as the currrent stack is limited to 512 bytes. In these situations, it is desirable for the programmer to express: "this lookup should never fail, and if it does, it means I messed up the code". By omitting the null check, the programmer can "ask" the verifier to double check the logic. === Changelog === Changes in v7: * Use more accurate frame number when marking precise * Add test for non-stack key * Test for marking stack slot precise Changes in v6: * Use is_spilled_scalar_reg() helper and remove unnecessary comment * Add back deleted selftest with different helper to dirty dst buffer * Check size of spill is exactly key_size and update selftests * Read slot_type from correct offset into the spi * Rewrite selftests in C where possible * Mark constant map keys as precise Changes in v5: * Dropped all acks * Use s64 instead of long for const_map_key * Ensure stack slot contains spilled reg before accessing spilled_ptr * Ensure spilled reg is a scalar before accessing tnum const value * Fix verifier selftest for 32-bit write to write at 8 byte alignment to ensure spill is tracked * Introduce more precise tracking of helper stack accesses * Do constant map key extraction as part of helper argument processing and then remove duplicated stack checks * Use ret_flag instead of regs[BPF_REG_0].type * Handle STACK_ZERO * Fix bug in bpf_load_hdr_opt() arg annotation Changes in v4: * Only allow for CAP_BPF * Add test for stack growing upwards * Improve comment about stack growing upwards Changes in v3: * Check if stack is (erroneously) growing upwards * Mention in commit message why existing tests needed change Changes in v2: * Added a check for when R2 is not a ptr to stack * Added a check for when stack is uninitialized (no stack slot yet) * Updated existing tests to account for null elision * Added test case for when R2 can be both const and non-const ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 556a399 + f932a8e commit b53b63d

19 files changed

+331
-69
lines changed

kernel/bpf/verifier.c

+106-33
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,7 @@ struct bpf_call_arg_meta {
287287
u32 ret_btf_id;
288288
u32 subprogno;
289289
struct btf_field *kptr_field;
290+
s64 const_map_key;
290291
};
291292

292293
struct bpf_kfunc_call_arg_meta {
@@ -5303,7 +5304,7 @@ enum bpf_access_src {
53035304
static int check_stack_range_initialized(struct bpf_verifier_env *env,
53045305
int regno, int off, int access_size,
53055306
bool zero_size_allowed,
5306-
enum bpf_access_src type,
5307+
enum bpf_access_type type,
53075308
struct bpf_call_arg_meta *meta);
53085309

53095310
static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
@@ -5336,7 +5337,7 @@ static int check_stack_read_var_off(struct bpf_verifier_env *env,
53365337
/* Note that we pass a NULL meta, so raw access will not be permitted.
53375338
*/
53385339
err = check_stack_range_initialized(env, ptr_regno, off, size,
5339-
false, ACCESS_DIRECT, NULL);
5340+
false, BPF_READ, NULL);
53405341
if (err)
53415342
return err;
53425343

@@ -7190,7 +7191,7 @@ static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
71907191
static int check_stack_access_within_bounds(
71917192
struct bpf_verifier_env *env,
71927193
int regno, int off, int access_size,
7193-
enum bpf_access_src src, enum bpf_access_type type)
7194+
enum bpf_access_type type)
71947195
{
71957196
struct bpf_reg_state *regs = cur_regs(env);
71967197
struct bpf_reg_state *reg = regs + regno;
@@ -7199,10 +7200,7 @@ static int check_stack_access_within_bounds(
71997200
int err;
72007201
char *err_extra;
72017202

7202-
if (src == ACCESS_HELPER)
7203-
/* We don't know if helpers are reading or writing (or both). */
7204-
err_extra = " indirect access to";
7205-
else if (type == BPF_READ)
7203+
if (type == BPF_READ)
72067204
err_extra = " read from";
72077205
else
72087206
err_extra = " write to";
@@ -7420,7 +7418,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
74207418

74217419
} else if (reg->type == PTR_TO_STACK) {
74227420
/* Basic bounds checks. */
7423-
err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
7421+
err = check_stack_access_within_bounds(env, regno, off, size, t);
74247422
if (err)
74257423
return err;
74267424

@@ -7640,13 +7638,11 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
76407638
static int check_stack_range_initialized(
76417639
struct bpf_verifier_env *env, int regno, int off,
76427640
int access_size, bool zero_size_allowed,
7643-
enum bpf_access_src type, struct bpf_call_arg_meta *meta)
7641+
enum bpf_access_type type, struct bpf_call_arg_meta *meta)
76447642
{
76457643
struct bpf_reg_state *reg = reg_state(env, regno);
76467644
struct bpf_func_state *state = func(env, reg);
76477645
int err, min_off, max_off, i, j, slot, spi;
7648-
char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
7649-
enum bpf_access_type bounds_check_type;
76507646
/* Some accesses can write anything into the stack, others are
76517647
* read-only.
76527648
*/
@@ -7657,18 +7653,10 @@ static int check_stack_range_initialized(
76577653
return -EACCES;
76587654
}
76597655

7660-
if (type == ACCESS_HELPER) {
7661-
/* The bounds checks for writes are more permissive than for
7662-
* reads. However, if raw_mode is not set, we'll do extra
7663-
* checks below.
7664-
*/
7665-
bounds_check_type = BPF_WRITE;
7656+
if (type == BPF_WRITE)
76667657
clobber = true;
7667-
} else {
7668-
bounds_check_type = BPF_READ;
7669-
}
7670-
err = check_stack_access_within_bounds(env, regno, off, access_size,
7671-
type, bounds_check_type);
7658+
7659+
err = check_stack_access_within_bounds(env, regno, off, access_size, type);
76727660
if (err)
76737661
return err;
76747662

@@ -7685,8 +7673,8 @@ static int check_stack_range_initialized(
76857673
char tn_buf[48];
76867674

76877675
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7688-
verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
7689-
regno, err_extra, tn_buf);
7676+
verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
7677+
regno, tn_buf);
76907678
return -EACCES;
76917679
}
76927680
/* Only initialized buffer on stack is allowed to be accessed
@@ -7739,7 +7727,7 @@ static int check_stack_range_initialized(
77397727
slot = -i - 1;
77407728
spi = slot / BPF_REG_SIZE;
77417729
if (state->allocated_stack <= slot) {
7742-
verbose(env, "verifier bug: allocated_stack too small");
7730+
verbose(env, "verifier bug: allocated_stack too small\n");
77437731
return -EFAULT;
77447732
}
77457733

@@ -7767,14 +7755,14 @@ static int check_stack_range_initialized(
77677755
}
77687756

77697757
if (tnum_is_const(reg->var_off)) {
7770-
verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
7771-
err_extra, regno, min_off, i - min_off, access_size);
7758+
verbose(env, "invalid read from stack R%d off %d+%d size %d\n",
7759+
regno, min_off, i - min_off, access_size);
77727760
} else {
77737761
char tn_buf[48];
77747762

77757763
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7776-
verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
7777-
err_extra, regno, tn_buf, i - min_off, access_size);
7764+
verbose(env, "invalid read from stack R%d var_off %s+%d size %d\n",
7765+
regno, tn_buf, i - min_off, access_size);
77787766
}
77797767
return -EACCES;
77807768
mark:
@@ -7849,7 +7837,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
78497837
return check_stack_range_initialized(
78507838
env,
78517839
regno, reg->off, access_size,
7852-
zero_size_allowed, ACCESS_HELPER, meta);
7840+
zero_size_allowed, access_type, meta);
78537841
case PTR_TO_BTF_ID:
78547842
return check_ptr_to_btf_access(env, regs, regno, reg->off,
78557843
access_size, BPF_READ, -1);
@@ -9161,6 +9149,63 @@ static int check_reg_const_str(struct bpf_verifier_env *env,
91619149
return 0;
91629150
}
91639151

9152+
/* Returns constant key value if possible, else negative error */
9153+
static s64 get_constant_map_key(struct bpf_verifier_env *env,
9154+
struct bpf_reg_state *key,
9155+
u32 key_size)
9156+
{
9157+
struct bpf_func_state *state = func(env, key);
9158+
struct bpf_reg_state *reg;
9159+
int slot, spi, off;
9160+
int spill_size = 0;
9161+
int zero_size = 0;
9162+
int stack_off;
9163+
int i, err;
9164+
u8 *stype;
9165+
9166+
if (!env->bpf_capable)
9167+
return -EOPNOTSUPP;
9168+
if (key->type != PTR_TO_STACK)
9169+
return -EOPNOTSUPP;
9170+
if (!tnum_is_const(key->var_off))
9171+
return -EOPNOTSUPP;
9172+
9173+
stack_off = key->off + key->var_off.value;
9174+
slot = -stack_off - 1;
9175+
spi = slot / BPF_REG_SIZE;
9176+
off = slot % BPF_REG_SIZE;
9177+
stype = state->stack[spi].slot_type;
9178+
9179+
/* First handle precisely tracked STACK_ZERO */
9180+
for (i = off; i >= 0 && stype[i] == STACK_ZERO; i--)
9181+
zero_size++;
9182+
if (zero_size >= key_size)
9183+
return 0;
9184+
9185+
/* Check that stack contains a scalar spill of expected size */
9186+
if (!is_spilled_scalar_reg(&state->stack[spi]))
9187+
return -EOPNOTSUPP;
9188+
for (i = off; i >= 0 && stype[i] == STACK_SPILL; i--)
9189+
spill_size++;
9190+
if (spill_size != key_size)
9191+
return -EOPNOTSUPP;
9192+
9193+
reg = &state->stack[spi].spilled_ptr;
9194+
if (!tnum_is_const(reg->var_off))
9195+
/* Stack value not statically known */
9196+
return -EOPNOTSUPP;
9197+
9198+
/* We are relying on a constant value. So mark as precise
9199+
* to prevent pruning on it.
9200+
*/
9201+
bt_set_frame_slot(&env->bt, key->frameno, spi);
9202+
err = mark_chain_precision_batch(env);
9203+
if (err < 0)
9204+
return err;
9205+
9206+
return reg->var_off.value;
9207+
}
9208+
91649209
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
91659210
struct bpf_call_arg_meta *meta,
91669211
const struct bpf_func_proto *fn,
@@ -9171,6 +9216,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
91719216
enum bpf_arg_type arg_type = fn->arg_type[arg];
91729217
enum bpf_reg_type type = reg->type;
91739218
u32 *arg_btf_id = NULL;
9219+
u32 key_size;
91749220
int err = 0;
91759221

91769222
if (arg_type == ARG_DONTCARE)
@@ -9304,8 +9350,13 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
93049350
verbose(env, "invalid map_ptr to access map->key\n");
93059351
return -EACCES;
93069352
}
9307-
err = check_helper_mem_access(env, regno, meta->map_ptr->key_size,
9308-
BPF_READ, false, NULL);
9353+
key_size = meta->map_ptr->key_size;
9354+
err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL);
9355+
if (err)
9356+
return err;
9357+
meta->const_map_key = get_constant_map_key(env, reg, key_size);
9358+
if (meta->const_map_key < 0 && meta->const_map_key != -EOPNOTSUPP)
9359+
return meta->const_map_key;
93099360
break;
93109361
case ARG_PTR_TO_MAP_VALUE:
93119362
if (type_may_be_null(arg_type) && register_is_null(reg))
@@ -10829,6 +10880,21 @@ static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno
1082910880
state->callback_subprogno == subprogno);
1083010881
}
1083110882

10883+
/* Returns whether or not the given map type can potentially elide
10884+
* lookup return value nullness check. This is possible if the key
10885+
* is statically known.
10886+
*/
10887+
static bool can_elide_value_nullness(enum bpf_map_type type)
10888+
{
10889+
switch (type) {
10890+
case BPF_MAP_TYPE_ARRAY:
10891+
case BPF_MAP_TYPE_PERCPU_ARRAY:
10892+
return true;
10893+
default:
10894+
return false;
10895+
}
10896+
}
10897+
1083210898
static int get_helper_proto(struct bpf_verifier_env *env, int func_id,
1083310899
const struct bpf_func_proto **ptr)
1083410900
{
@@ -11195,10 +11261,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
1119511261
"kernel subsystem misconfigured verifier\n");
1119611262
return -EINVAL;
1119711263
}
11264+
11265+
if (func_id == BPF_FUNC_map_lookup_elem &&
11266+
can_elide_value_nullness(meta.map_ptr->map_type) &&
11267+
meta.const_map_key >= 0 &&
11268+
meta.const_map_key < meta.map_ptr->max_entries)
11269+
ret_flag &= ~PTR_MAYBE_NULL;
11270+
1119811271
regs[BPF_REG_0].map_ptr = meta.map_ptr;
1119911272
regs[BPF_REG_0].map_uid = meta.map_uid;
1120011273
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
11201-
if (!type_may_be_null(ret_type) &&
11274+
if (!type_may_be_null(ret_flag) &&
1120211275
btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK)) {
1120311276
regs[BPF_REG_0].id = ++env->id_gen;
1120411277
}

net/core/filter.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -7643,7 +7643,7 @@ static const struct bpf_func_proto bpf_sock_ops_load_hdr_opt_proto = {
76437643
.gpl_only = false,
76447644
.ret_type = RET_INTEGER,
76457645
.arg1_type = ARG_PTR_TO_CTX,
7646-
.arg2_type = ARG_PTR_TO_MEM,
7646+
.arg2_type = ARG_PTR_TO_MEM | MEM_WRITE,
76477647
.arg3_type = ARG_CONST_SIZE,
76487648
.arg4_type = ARG_ANYTHING,
76497649
};

tools/testing/selftests/bpf/progs/dynptr_fail.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ int ringbuf_invalid_api(void *ctx)
192192

193193
/* Can't add a dynptr to a map */
194194
SEC("?raw_tp")
195-
__failure __msg("invalid indirect read from stack")
195+
__failure __msg("invalid read from stack")
196196
int add_dynptr_to_map1(void *ctx)
197197
{
198198
struct bpf_dynptr ptr;
@@ -210,7 +210,7 @@ int add_dynptr_to_map1(void *ctx)
210210

211211
/* Can't add a struct with an embedded dynptr to a map */
212212
SEC("?raw_tp")
213-
__failure __msg("invalid indirect read from stack")
213+
__failure __msg("invalid read from stack")
214214
int add_dynptr_to_map2(void *ctx)
215215
{
216216
struct test_info x;
@@ -398,7 +398,7 @@ int data_slice_missing_null_check2(void *ctx)
398398
* dynptr argument
399399
*/
400400
SEC("?raw_tp")
401-
__failure __msg("invalid indirect read from stack")
401+
__failure __msg("invalid read from stack")
402402
int invalid_helper1(void *ctx)
403403
{
404404
struct bpf_dynptr ptr;

tools/testing/selftests/bpf/progs/iters.c

+7-7
Original file line numberDiff line numberDiff line change
@@ -524,11 +524,11 @@ int iter_subprog_iters(const void *ctx)
524524
}
525525

526526
struct {
527-
__uint(type, BPF_MAP_TYPE_ARRAY);
527+
__uint(type, BPF_MAP_TYPE_HASH);
528528
__type(key, int);
529529
__type(value, int);
530530
__uint(max_entries, 1000);
531-
} arr_map SEC(".maps");
531+
} hash_map SEC(".maps");
532532

533533
SEC("?raw_tp")
534534
__failure __msg("invalid mem access 'scalar'")
@@ -539,7 +539,7 @@ int iter_err_too_permissive1(const void *ctx)
539539

540540
MY_PID_GUARD();
541541

542-
map_val = bpf_map_lookup_elem(&arr_map, &key);
542+
map_val = bpf_map_lookup_elem(&hash_map, &key);
543543
if (!map_val)
544544
return 0;
545545

@@ -561,12 +561,12 @@ int iter_err_too_permissive2(const void *ctx)
561561

562562
MY_PID_GUARD();
563563

564-
map_val = bpf_map_lookup_elem(&arr_map, &key);
564+
map_val = bpf_map_lookup_elem(&hash_map, &key);
565565
if (!map_val)
566566
return 0;
567567

568568
bpf_repeat(1000000) {
569-
map_val = bpf_map_lookup_elem(&arr_map, &key);
569+
map_val = bpf_map_lookup_elem(&hash_map, &key);
570570
}
571571

572572
*map_val = 123;
@@ -585,7 +585,7 @@ int iter_err_too_permissive3(const void *ctx)
585585
MY_PID_GUARD();
586586

587587
bpf_repeat(1000000) {
588-
map_val = bpf_map_lookup_elem(&arr_map, &key);
588+
map_val = bpf_map_lookup_elem(&hash_map, &key);
589589
found = true;
590590
}
591591

@@ -606,7 +606,7 @@ int iter_tricky_but_fine(const void *ctx)
606606
MY_PID_GUARD();
607607

608608
bpf_repeat(1000000) {
609-
map_val = bpf_map_lookup_elem(&arr_map, &key);
609+
map_val = bpf_map_lookup_elem(&hash_map, &key);
610610
if (map_val) {
611611
found = true;
612612
break;

tools/testing/selftests/bpf/progs/map_kptr_fail.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ int reject_indirect_global_func_access(struct __sk_buff *ctx)
345345
}
346346

347347
SEC("?tc")
348-
__failure __msg("Unreleased reference id=5 alloc_insn=")
348+
__failure __msg("Unreleased reference id=4 alloc_insn=")
349349
int kptr_xchg_ref_state(struct __sk_buff *ctx)
350350
{
351351
struct prog_test_ref_kfunc *p;

tools/testing/selftests/bpf/progs/test_global_func10.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ __noinline int foo(const struct Big *big)
2626
}
2727

2828
SEC("cgroup_skb/ingress")
29-
__failure __msg("invalid indirect access to stack")
29+
__failure __msg("invalid read from stack")
3030
int global_func10(struct __sk_buff *skb)
3131
{
3232
const struct Small small = {.x = skb->len };

tools/testing/selftests/bpf/progs/uninit_stack.c

+3-2
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ __naked int helper_uninit_to_misc(void *ctx)
7070
r1 = r10; \
7171
r1 += -128; \
7272
r2 = 32; \
73-
call %[bpf_trace_printk]; \
73+
r3 = 0; \
74+
call %[bpf_probe_read_user]; \
7475
/* Call to dummy() forces print_verifier_state(..., true), \
7576
* thus showing the stack state, matched by __msg(). \
7677
*/ \
@@ -79,7 +80,7 @@ __naked int helper_uninit_to_misc(void *ctx)
7980
exit; \
8081
"
8182
:
82-
: __imm(bpf_trace_printk),
83+
: __imm(bpf_probe_read_user),
8384
__imm(dummy)
8485
: __clobber_all);
8586
}

0 commit comments

Comments
 (0)