Skip to content

Commit dee872e

Browse files
kkdwivediAlexei Starovoitov
authored andcommitted
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF ID sets in the struct btf itself. The various kernel subsystems will make register_btf_kfunc_id_set call in the initcalls (for built-in code and modules). The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS, STRUCT_OPS, and 'types' are check (allowed or not), acquire, release, and ret_null (with PTR_TO_BTF_ID_OR_NULL return type). A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a set of certain hook and type for vmlinux sets, since they are allocated on demand, and otherwise set as NULL. Module sets can only be registered once per hook and type, hence they are directly assigned. A new btf_kfunc_id_set_contains function is exposed for use in verifier, this new method is faster than the existing list searching method, and is also automatic. It also lets other code not care whether the set is unallocated or not. Note that module code can only do single register_btf_kfunc_id_set call per hook. This is why sorting is only done for in-kernel vmlinux sets, because there might be multiple sets for the same hook and type that must be concatenated, hence sorting them is required to ensure bsearch in btf_id_set_contains continues to work correctly. Next commit will update the kernel users to make use of this infrastructure. Finally, add __maybe_unused annotation for BTF ID macros for the !CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during build time. The previous patch is also needed to provide synchronization against initialization for module BTF's kfunc_set_tab introduced here, as described below: The kfunc_set_tab pointer in struct btf is write-once (if we consider the registration phase (comprised of multiple register_btf_kfunc_id_set calls) as a single operation). In this sense, once it has been fully prepared, it isn't modified, only used for lookup (from the verifier context). For btf_vmlinux, it is initialized fully during the do_initcalls phase, which happens fairly early in the boot process, before any processes are present. This also eliminates the possibility of bpf_check being called at that point, thus relieving us of ensuring any synchronization between the registration and lookup function (btf_kfunc_id_set_contains). However, the case for module BTF is a bit tricky. The BTF is parsed, prepared, and published from the MODULE_STATE_COMING notifier callback. After this, the module initcalls are invoked, where our registration function will be called to populate the kfunc_set_tab for module BTF. At this point, BTF may be available to userspace while its corresponding module is still intializing. A BTF fd can then be passed to verifier using bpf syscall (e.g. for kfunc call insn). Hence, there is a race window where verifier may concurrently try to lookup the kfunc_set_tab. To prevent this race, we must ensure the operations are serialized, or waiting for the __init functions to complete. In the earlier registration API, this race was alleviated as verifier bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added by the registration function (called usually at the end of module __init function after all module resources have been initialized). If the verifier made the check_kfunc_call before kfunc BTF ID was added to the list, it would fail verification (saying call isn't allowed). The access to list was protected using a mutex. Now, it would still fail verification, but for a different reason (returning ENXIO due to the failed btf_try_get_module call in add_kfunc_call), because if the __init call is in progress the module will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE transition, and the BTF_MODULE_LIVE flag for btf_module instance will not be set, so the btf_try_get_module call will fail. Signed-off-by: Kumar Kartikeya Dwivedi <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 18688de commit dee872e

File tree

3 files changed

+289
-7
lines changed

3 files changed

+289
-7
lines changed

include/linux/btf.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,33 @@
1212
#define BTF_TYPE_EMIT(type) ((void)(type *)0)
1313
#define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val)
1414

15+
enum btf_kfunc_type {
16+
BTF_KFUNC_TYPE_CHECK,
17+
BTF_KFUNC_TYPE_ACQUIRE,
18+
BTF_KFUNC_TYPE_RELEASE,
19+
BTF_KFUNC_TYPE_RET_NULL,
20+
BTF_KFUNC_TYPE_MAX,
21+
};
22+
1523
struct btf;
1624
struct btf_member;
1725
struct btf_type;
1826
union bpf_attr;
1927
struct btf_show;
28+
struct btf_id_set;
29+
30+
struct btf_kfunc_id_set {
31+
struct module *owner;
32+
union {
33+
struct {
34+
struct btf_id_set *check_set;
35+
struct btf_id_set *acquire_set;
36+
struct btf_id_set *release_set;
37+
struct btf_id_set *ret_null_set;
38+
};
39+
struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX];
40+
};
41+
};
2042

2143
extern const struct file_operations btf_fops;
2244

@@ -307,6 +329,11 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
307329
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
308330
struct btf *btf_parse_vmlinux(void);
309331
struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
332+
bool btf_kfunc_id_set_contains(const struct btf *btf,
333+
enum bpf_prog_type prog_type,
334+
enum btf_kfunc_type type, u32 kfunc_btf_id);
335+
int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
336+
const struct btf_kfunc_id_set *s);
310337
#else
311338
static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
312339
u32 type_id)
@@ -318,6 +345,18 @@ static inline const char *btf_name_by_offset(const struct btf *btf,
318345
{
319346
return NULL;
320347
}
348+
static inline bool btf_kfunc_id_set_contains(const struct btf *btf,
349+
enum bpf_prog_type prog_type,
350+
enum btf_kfunc_type type,
351+
u32 kfunc_btf_id)
352+
{
353+
return false;
354+
}
355+
static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
356+
const struct btf_kfunc_id_set *s)
357+
{
358+
return 0;
359+
}
321360
#endif
322361

323362
struct kfunc_btf_id_set {

include/linux/btf_ids.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ struct btf_id_set {
1111
#ifdef CONFIG_DEBUG_INFO_BTF
1212

1313
#include <linux/compiler.h> /* for __PASTE */
14+
#include <linux/compiler_attributes.h> /* for __maybe_unused */
1415

1516
/*
1617
* Following macros help to define lists of BTF IDs placed
@@ -146,14 +147,14 @@ extern struct btf_id_set name;
146147

147148
#else
148149

149-
#define BTF_ID_LIST(name) static u32 name[5];
150+
#define BTF_ID_LIST(name) static u32 __maybe_unused name[5];
150151
#define BTF_ID(prefix, name)
151152
#define BTF_ID_UNUSED
152-
#define BTF_ID_LIST_GLOBAL(name, n) u32 name[n];
153-
#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
154-
#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1];
155-
#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
156-
#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
153+
#define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n];
154+
#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1];
155+
#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1];
156+
#define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 };
157+
#define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 };
157158
#define BTF_SET_END(name)
158159

159160
#endif /* CONFIG_DEBUG_INFO_BTF */

kernel/bpf/btf.c

Lines changed: 243 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,21 @@
198198
DEFINE_IDR(btf_idr);
199199
DEFINE_SPINLOCK(btf_idr_lock);
200200

201+
enum btf_kfunc_hook {
202+
BTF_KFUNC_HOOK_XDP,
203+
BTF_KFUNC_HOOK_TC,
204+
BTF_KFUNC_HOOK_STRUCT_OPS,
205+
BTF_KFUNC_HOOK_MAX,
206+
};
207+
208+
enum {
209+
BTF_KFUNC_SET_MAX_CNT = 32,
210+
};
211+
212+
struct btf_kfunc_set_tab {
213+
struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX];
214+
};
215+
201216
struct btf {
202217
void *data;
203218
struct btf_type **types;
@@ -212,6 +227,7 @@ struct btf {
212227
refcount_t refcnt;
213228
u32 id;
214229
struct rcu_head rcu;
230+
struct btf_kfunc_set_tab *kfunc_set_tab;
215231

216232
/* split BTF support */
217233
struct btf *base_btf;
@@ -1531,8 +1547,30 @@ static void btf_free_id(struct btf *btf)
15311547
spin_unlock_irqrestore(&btf_idr_lock, flags);
15321548
}
15331549

1550+
static void btf_free_kfunc_set_tab(struct btf *btf)
1551+
{
1552+
struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab;
1553+
int hook, type;
1554+
1555+
if (!tab)
1556+
return;
1557+
/* For module BTF, we directly assign the sets being registered, so
1558+
* there is nothing to free except kfunc_set_tab.
1559+
*/
1560+
if (btf_is_module(btf))
1561+
goto free_tab;
1562+
for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) {
1563+
for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++)
1564+
kfree(tab->sets[hook][type]);
1565+
}
1566+
free_tab:
1567+
kfree(tab);
1568+
btf->kfunc_set_tab = NULL;
1569+
}
1570+
15341571
static void btf_free(struct btf *btf)
15351572
{
1573+
btf_free_kfunc_set_tab(btf);
15361574
kvfree(btf->types);
15371575
kvfree(btf->resolved_sizes);
15381576
kvfree(btf->resolved_ids);
@@ -6371,6 +6409,36 @@ struct module *btf_try_get_module(const struct btf *btf)
63716409
return res;
63726410
}
63736411

6412+
/* Returns struct btf corresponding to the struct module
6413+
*
6414+
* This function can return NULL or ERR_PTR. Note that caller must
6415+
* release reference for struct btf iff btf_is_module is true.
6416+
*/
6417+
static struct btf *btf_get_module_btf(const struct module *module)
6418+
{
6419+
struct btf *btf = NULL;
6420+
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
6421+
struct btf_module *btf_mod, *tmp;
6422+
#endif
6423+
6424+
if (!module)
6425+
return bpf_get_btf_vmlinux();
6426+
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
6427+
mutex_lock(&btf_module_mutex);
6428+
list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
6429+
if (btf_mod->module != module)
6430+
continue;
6431+
6432+
btf_get(btf_mod->btf);
6433+
btf = btf_mod->btf;
6434+
break;
6435+
}
6436+
mutex_unlock(&btf_module_mutex);
6437+
#endif
6438+
6439+
return btf;
6440+
}
6441+
63746442
BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags)
63756443
{
63766444
struct btf *btf;
@@ -6438,7 +6506,181 @@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE)
64386506
BTF_TRACING_TYPE_xxx
64396507
#undef BTF_TRACING_TYPE
64406508

6441-
/* BTF ID set registration API for modules */
6509+
/* Kernel Function (kfunc) BTF ID set registration API */
6510+
6511+
static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
6512+
enum btf_kfunc_type type,
6513+
struct btf_id_set *add_set, bool vmlinux_set)
6514+
{
6515+
struct btf_kfunc_set_tab *tab;
6516+
struct btf_id_set *set;
6517+
u32 set_cnt;
6518+
int ret;
6519+
6520+
if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) {
6521+
ret = -EINVAL;
6522+
goto end;
6523+
}
6524+
6525+
if (!add_set->cnt)
6526+
return 0;
6527+
6528+
tab = btf->kfunc_set_tab;
6529+
if (!tab) {
6530+
tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN);
6531+
if (!tab)
6532+
return -ENOMEM;
6533+
btf->kfunc_set_tab = tab;
6534+
}
6535+
6536+
set = tab->sets[hook][type];
6537+
/* Warn when register_btf_kfunc_id_set is called twice for the same hook
6538+
* for module sets.
6539+
*/
6540+
if (WARN_ON_ONCE(set && !vmlinux_set)) {
6541+
ret = -EINVAL;
6542+
goto end;
6543+
}
6544+
6545+
/* We don't need to allocate, concatenate, and sort module sets, because
6546+
* only one is allowed per hook. Hence, we can directly assign the
6547+
* pointer and return.
6548+
*/
6549+
if (!vmlinux_set) {
6550+
tab->sets[hook][type] = add_set;
6551+
return 0;
6552+
}
6553+
6554+
/* In case of vmlinux sets, there may be more than one set being
6555+
* registered per hook. To create a unified set, we allocate a new set
6556+
* and concatenate all individual sets being registered. While each set
6557+
* is individually sorted, they may become unsorted when concatenated,
6558+
* hence re-sorting the final set again is required to make binary
6559+
* searching the set using btf_id_set_contains function work.
6560+
*/
6561+
set_cnt = set ? set->cnt : 0;
6562+
6563+
if (set_cnt > U32_MAX - add_set->cnt) {
6564+
ret = -EOVERFLOW;
6565+
goto end;
6566+
}
6567+
6568+
if (set_cnt + add_set->cnt > BTF_KFUNC_SET_MAX_CNT) {
6569+
ret = -E2BIG;
6570+
goto end;
6571+
}
6572+
6573+
/* Grow set */
6574+
set = krealloc(tab->sets[hook][type],
6575+
offsetof(struct btf_id_set, ids[set_cnt + add_set->cnt]),
6576+
GFP_KERNEL | __GFP_NOWARN);
6577+
if (!set) {
6578+
ret = -ENOMEM;
6579+
goto end;
6580+
}
6581+
6582+
/* For newly allocated set, initialize set->cnt to 0 */
6583+
if (!tab->sets[hook][type])
6584+
set->cnt = 0;
6585+
tab->sets[hook][type] = set;
6586+
6587+
/* Concatenate the two sets */
6588+
memcpy(set->ids + set->cnt, add_set->ids, add_set->cnt * sizeof(set->ids[0]));
6589+
set->cnt += add_set->cnt;
6590+
6591+
sort(set->ids, set->cnt, sizeof(set->ids[0]), btf_id_cmp_func, NULL);
6592+
6593+
return 0;
6594+
end:
6595+
btf_free_kfunc_set_tab(btf);
6596+
return ret;
6597+
}
6598+
6599+
static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
6600+
const struct btf_kfunc_id_set *kset)
6601+
{
6602+
bool vmlinux_set = !btf_is_module(btf);
6603+
int type, ret;
6604+
6605+
for (type = 0; type < ARRAY_SIZE(kset->sets); type++) {
6606+
if (!kset->sets[type])
6607+
continue;
6608+
6609+
ret = __btf_populate_kfunc_set(btf, hook, type, kset->sets[type], vmlinux_set);
6610+
if (ret)
6611+
break;
6612+
}
6613+
return ret;
6614+
}
6615+
6616+
static bool __btf_kfunc_id_set_contains(const struct btf *btf,
6617+
enum btf_kfunc_hook hook,
6618+
enum btf_kfunc_type type,
6619+
u32 kfunc_btf_id)
6620+
{
6621+
struct btf_id_set *set;
6622+
6623+
if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX)
6624+
return false;
6625+
if (!btf->kfunc_set_tab)
6626+
return false;
6627+
set = btf->kfunc_set_tab->sets[hook][type];
6628+
if (!set)
6629+
return false;
6630+
return btf_id_set_contains(set, kfunc_btf_id);
6631+
}
6632+
6633+
static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
6634+
{
6635+
switch (prog_type) {
6636+
case BPF_PROG_TYPE_XDP:
6637+
return BTF_KFUNC_HOOK_XDP;
6638+
case BPF_PROG_TYPE_SCHED_CLS:
6639+
return BTF_KFUNC_HOOK_TC;
6640+
case BPF_PROG_TYPE_STRUCT_OPS:
6641+
return BTF_KFUNC_HOOK_STRUCT_OPS;
6642+
default:
6643+
return BTF_KFUNC_HOOK_MAX;
6644+
}
6645+
}
6646+
6647+
/* Caution:
6648+
* Reference to the module (obtained using btf_try_get_module) corresponding to
6649+
* the struct btf *MUST* be held when calling this function from verifier
6650+
* context. This is usually true as we stash references in prog's kfunc_btf_tab;
6651+
* keeping the reference for the duration of the call provides the necessary
6652+
* protection for looking up a well-formed btf->kfunc_set_tab.
6653+
*/
6654+
bool btf_kfunc_id_set_contains(const struct btf *btf,
6655+
enum bpf_prog_type prog_type,
6656+
enum btf_kfunc_type type, u32 kfunc_btf_id)
6657+
{
6658+
enum btf_kfunc_hook hook;
6659+
6660+
hook = bpf_prog_type_to_kfunc_hook(prog_type);
6661+
return __btf_kfunc_id_set_contains(btf, hook, type, kfunc_btf_id);
6662+
}
6663+
6664+
/* This function must be invoked only from initcalls/module init functions */
6665+
int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
6666+
const struct btf_kfunc_id_set *kset)
6667+
{
6668+
enum btf_kfunc_hook hook;
6669+
struct btf *btf;
6670+
int ret;
6671+
6672+
btf = btf_get_module_btf(kset->owner);
6673+
if (IS_ERR_OR_NULL(btf))
6674+
return btf ? PTR_ERR(btf) : -ENOENT;
6675+
6676+
hook = bpf_prog_type_to_kfunc_hook(prog_type);
6677+
ret = btf_populate_kfunc_set(btf, hook, kset);
6678+
/* reference is only taken for module BTF */
6679+
if (btf_is_module(btf))
6680+
btf_put(btf);
6681+
return ret;
6682+
}
6683+
EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set);
64426684

64436685
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
64446686

0 commit comments

Comments
 (0)