Skip to content

Commit 41c48f3

Browse files
rdnaborkmann
authored andcommitted
bpf: Support access to bpf map fields
There are multiple use-cases when it's convenient to have access to bpf map fields, both `struct bpf_map` and map type specific struct-s such as `struct bpf_array`, `struct bpf_htab`, etc. For example while working with sock arrays it can be necessary to calculate the key based on map->max_entries (some_hash % max_entries). Currently this is solved by communicating max_entries via "out-of-band" channel, e.g. via additional map with known key to get info about target map. That works, but is not very convenient and error-prone while working with many maps. In other cases necessary data is dynamic (i.e. unknown at loading time) and it's impossible to get it at all. For example while working with a hash table it can be convenient to know how much capacity is already used (bpf_htab.count.counter for BPF_F_NO_PREALLOC case). At the same time kernel knows this info and can provide it to bpf program. Fill this gap by adding support to access bpf map fields from bpf program for both `struct bpf_map` and map type specific fields. Support is implemented via btf_struct_access() so that a user can define their own `struct bpf_map` or map type specific struct in their program with only necessary fields and preserve_access_index attribute, cast a map to this struct and use a field. For example: struct bpf_map { __u32 max_entries; } __attribute__((preserve_access_index)); struct bpf_array { struct bpf_map map; __u32 elem_size; } __attribute__((preserve_access_index)); struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 4); __type(key, __u32); __type(value, __u32); } m_array SEC(".maps"); SEC("cgroup_skb/egress") int cg_skb(void *ctx) { struct bpf_array *array = (struct bpf_array *)&m_array; struct bpf_map *map = (struct bpf_map *)&m_array; /* .. use map->max_entries or array->map.max_entries .. */ } Similarly to other btf_struct_access() use-cases (e.g. struct tcp_sock in net/ipv4/bpf_tcp_ca.c) the patch allows access to any fields of corresponding struct. Only reading from map fields is supported. For btf_struct_access() to work there should be a way to know btf id of a struct that corresponds to a map type. To get btf id there should be a way to get a stringified name of map-specific struct, such as "bpf_array", "bpf_htab", etc for a map type. Two new fields are added to `struct bpf_map_ops` to handle it: * .map_btf_name keeps a btf name of a struct returned by map_alloc(); * .map_btf_id is used to cache btf id of that struct. To make btf ids calculation cheaper they're calculated once while preparing btf_vmlinux and cached same way as it's done for btf_id field of `struct bpf_func_proto` While calculating btf ids, struct names are NOT checked for collision. Collisions will be checked as a part of the work to prepare btf ids used in verifier in compile time that should land soon. The only known collision for `struct bpf_htab` (kernel/bpf/hashtab.c vs net/core/sock_map.c) was fixed earlier. Both new fields .map_btf_name and .map_btf_id must be set for a map type for the feature to work. If neither is set for a map type, verifier will return ENOTSUPP on a try to access map_ptr of corresponding type. If just one of them set, it's verifier misconfiguration. Only `struct bpf_array` for BPF_MAP_TYPE_ARRAY and `struct bpf_htab` for BPF_MAP_TYPE_HASH are supported by this patch. Other map types will be supported separately. The feature is available only for CONFIG_DEBUG_INFO_BTF=y and gated by perfmon_capable() so that unpriv programs won't have access to bpf map fields. Signed-off-by: Andrey Ignatov <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]> Acked-by: John Fastabend <[email protected]> Acked-by: Martin KaFai Lau <[email protected]> Link: https://lore.kernel.org/bpf/6479686a0cd1e9067993df57b4c3eef0e276fec9.1592600985.git.rdna@fb.com
1 parent 032a6b3 commit 41c48f3

File tree

7 files changed

+131
-9
lines changed

7 files changed

+131
-9
lines changed

include/linux/bpf.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,10 @@ struct bpf_map_ops {
9292
int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma);
9393
__poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
9494
struct poll_table_struct *pts);
95+
96+
/* BTF name and id of struct allocated by map_alloc */
97+
const char * const map_btf_name;
98+
int *map_btf_id;
9599
};
96100

97101
struct bpf_map_memory {
@@ -1109,6 +1113,11 @@ static inline bool bpf_allow_ptr_leaks(void)
11091113
return perfmon_capable();
11101114
}
11111115

1116+
static inline bool bpf_allow_ptr_to_map_access(void)
1117+
{
1118+
return perfmon_capable();
1119+
}
1120+
11121121
static inline bool bpf_bypass_spec_v1(void)
11131122
{
11141123
return perfmon_capable();

include/linux/bpf_verifier.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@ struct bpf_verifier_env {
379379
u32 used_map_cnt; /* number of used maps */
380380
u32 id_gen; /* used to generate unique reg IDs */
381381
bool allow_ptr_leaks;
382+
bool allow_ptr_to_map_access;
382383
bool bpf_capable;
383384
bool bypass_spec_v1;
384385
bool bypass_spec_v4;

kernel/bpf/arraymap.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,7 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
494494
vma->vm_pgoff + pgoff);
495495
}
496496

497+
static int array_map_btf_id;
497498
const struct bpf_map_ops array_map_ops = {
498499
.map_alloc_check = array_map_alloc_check,
499500
.map_alloc = array_map_alloc,
@@ -510,6 +511,8 @@ const struct bpf_map_ops array_map_ops = {
510511
.map_check_btf = array_map_check_btf,
511512
.map_lookup_batch = generic_map_lookup_batch,
512513
.map_update_batch = generic_map_update_batch,
514+
.map_btf_name = "bpf_array",
515+
.map_btf_id = &array_map_btf_id,
513516
};
514517

515518
const struct bpf_map_ops percpu_array_map_ops = {

kernel/bpf/btf.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3571,6 +3571,41 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
35713571
return ctx_type;
35723572
}
35733573

3574+
static const struct bpf_map_ops * const btf_vmlinux_map_ops[] = {
3575+
#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
3576+
#define BPF_LINK_TYPE(_id, _name)
3577+
#define BPF_MAP_TYPE(_id, _ops) \
3578+
[_id] = &_ops,
3579+
#include <linux/bpf_types.h>
3580+
#undef BPF_PROG_TYPE
3581+
#undef BPF_LINK_TYPE
3582+
#undef BPF_MAP_TYPE
3583+
};
3584+
3585+
static int btf_vmlinux_map_ids_init(const struct btf *btf,
3586+
struct bpf_verifier_log *log)
3587+
{
3588+
const struct bpf_map_ops *ops;
3589+
int i, btf_id;
3590+
3591+
for (i = 0; i < ARRAY_SIZE(btf_vmlinux_map_ops); ++i) {
3592+
ops = btf_vmlinux_map_ops[i];
3593+
if (!ops || (!ops->map_btf_name && !ops->map_btf_id))
3594+
continue;
3595+
if (!ops->map_btf_name || !ops->map_btf_id) {
3596+
bpf_log(log, "map type %d is misconfigured\n", i);
3597+
return -EINVAL;
3598+
}
3599+
btf_id = btf_find_by_name_kind(btf, ops->map_btf_name,
3600+
BTF_KIND_STRUCT);
3601+
if (btf_id < 0)
3602+
return btf_id;
3603+
*ops->map_btf_id = btf_id;
3604+
}
3605+
3606+
return 0;
3607+
}
3608+
35743609
static int btf_translate_to_vmlinux(struct bpf_verifier_log *log,
35753610
struct btf *btf,
35763611
const struct btf_type *t,
@@ -3633,6 +3668,11 @@ struct btf *btf_parse_vmlinux(void)
36333668
/* btf_parse_vmlinux() runs under bpf_verifier_lock */
36343669
bpf_ctx_convert.t = btf_type_by_id(btf, btf_id);
36353670

3671+
/* find bpf map structs for map_ptr access checking */
3672+
err = btf_vmlinux_map_ids_init(btf, log);
3673+
if (err < 0)
3674+
goto errout;
3675+
36363676
bpf_struct_ops_init(btf, log);
36373677

36383678
btf_verifier_env_free(env);

kernel/bpf/hashtab.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1614,6 +1614,7 @@ htab_lru_map_lookup_and_delete_batch(struct bpf_map *map,
16141614
true, false);
16151615
}
16161616

1617+
static int htab_map_btf_id;
16171618
const struct bpf_map_ops htab_map_ops = {
16181619
.map_alloc_check = htab_map_alloc_check,
16191620
.map_alloc = htab_map_alloc,
@@ -1625,6 +1626,8 @@ const struct bpf_map_ops htab_map_ops = {
16251626
.map_gen_lookup = htab_map_gen_lookup,
16261627
.map_seq_show_elem = htab_map_seq_show_elem,
16271628
BATCH_OPS(htab),
1629+
.map_btf_name = "bpf_htab",
1630+
.map_btf_id = &htab_map_btf_id,
16281631
};
16291632

16301633
const struct bpf_map_ops htab_lru_map_ops = {

kernel/bpf/verifier.c

Lines changed: 74 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,6 +1351,19 @@ static void mark_reg_not_init(struct bpf_verifier_env *env,
13511351
__mark_reg_not_init(env, regs + regno);
13521352
}
13531353

1354+
static void mark_btf_ld_reg(struct bpf_verifier_env *env,
1355+
struct bpf_reg_state *regs, u32 regno,
1356+
enum bpf_reg_type reg_type, u32 btf_id)
1357+
{
1358+
if (reg_type == SCALAR_VALUE) {
1359+
mark_reg_unknown(env, regs, regno);
1360+
return;
1361+
}
1362+
mark_reg_known_zero(env, regs, regno);
1363+
regs[regno].type = PTR_TO_BTF_ID;
1364+
regs[regno].btf_id = btf_id;
1365+
}
1366+
13541367
#define DEF_NOT_SUBREG (0)
13551368
static void init_reg_state(struct bpf_verifier_env *env,
13561369
struct bpf_func_state *state)
@@ -3182,19 +3195,68 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
31823195
if (ret < 0)
31833196
return ret;
31843197

3185-
if (atype == BPF_READ && value_regno >= 0) {
3186-
if (ret == SCALAR_VALUE) {
3187-
mark_reg_unknown(env, regs, value_regno);
3188-
return 0;
3189-
}
3190-
mark_reg_known_zero(env, regs, value_regno);
3191-
regs[value_regno].type = PTR_TO_BTF_ID;
3192-
regs[value_regno].btf_id = btf_id;
3198+
if (atype == BPF_READ && value_regno >= 0)
3199+
mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
3200+
3201+
return 0;
3202+
}
3203+
3204+
static int check_ptr_to_map_access(struct bpf_verifier_env *env,
3205+
struct bpf_reg_state *regs,
3206+
int regno, int off, int size,
3207+
enum bpf_access_type atype,
3208+
int value_regno)
3209+
{
3210+
struct bpf_reg_state *reg = regs + regno;
3211+
struct bpf_map *map = reg->map_ptr;
3212+
const struct btf_type *t;
3213+
const char *tname;
3214+
u32 btf_id;
3215+
int ret;
3216+
3217+
if (!btf_vmlinux) {
3218+
verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
3219+
return -ENOTSUPP;
3220+
}
3221+
3222+
if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
3223+
verbose(env, "map_ptr access not supported for map type %d\n",
3224+
map->map_type);
3225+
return -ENOTSUPP;
3226+
}
3227+
3228+
t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
3229+
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
3230+
3231+
if (!env->allow_ptr_to_map_access) {
3232+
verbose(env,
3233+
"%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
3234+
tname);
3235+
return -EPERM;
31933236
}
31943237

3238+
if (off < 0) {
3239+
verbose(env, "R%d is %s invalid negative access: off=%d\n",
3240+
regno, tname, off);
3241+
return -EACCES;
3242+
}
3243+
3244+
if (atype != BPF_READ) {
3245+
verbose(env, "only read from %s is supported\n", tname);
3246+
return -EACCES;
3247+
}
3248+
3249+
ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
3250+
if (ret < 0)
3251+
return ret;
3252+
3253+
if (value_regno >= 0)
3254+
mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
3255+
31953256
return 0;
31963257
}
31973258

3259+
31983260
/* check whether memory at (regno + off) is accessible for t = (read | write)
31993261
* if t==write, value_regno is a register which value is stored into memory
32003262
* if t==read, value_regno is a register which will receive the value from memory
@@ -3363,6 +3425,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
33633425
} else if (reg->type == PTR_TO_BTF_ID) {
33643426
err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
33653427
value_regno);
3428+
} else if (reg->type == CONST_PTR_TO_MAP) {
3429+
err = check_ptr_to_map_access(env, regs, regno, off, size, t,
3430+
value_regno);
33663431
} else {
33673432
verbose(env, "R%d invalid mem access '%s'\n", regno,
33683433
reg_type_str[reg->type]);
@@ -10951,6 +11016,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
1095111016
env->strict_alignment = false;
1095211017

1095311018
env->allow_ptr_leaks = bpf_allow_ptr_leaks();
11019+
env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
1095411020
env->bypass_spec_v1 = bpf_bypass_spec_v1();
1095511021
env->bypass_spec_v4 = bpf_bypass_spec_v4();
1095611022
env->bpf_capable = bpf_capable();

tools/testing/selftests/bpf/verifier/map_ptr_mixing.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
.fixup_map_in_map = { 16 },
5757
.fixup_map_array_48b = { 13 },
5858
.result = REJECT,
59-
.errstr = "R0 invalid mem access 'map_ptr'",
59+
.errstr = "only read from bpf_array is supported",
6060
},
6161
{
6262
"cond: two branches returning different map pointers for lookup (tail, tail)",

0 commit comments

Comments
 (0)