From: Peter Zijlstra <[email protected]> kallsym_tree is based on rbtree_latch. It is designed to hold dynamic kernel symbols like bpf program, ftrace kallsyms, etc.
Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Song Liu <[email protected]> --- include/linux/kallsyms.h | 16 ++++ kernel/extable.c | 2 + kernel/kallsyms.c | 188 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 205 insertions(+), 1 deletion(-) diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 657a83b943f0..be83ac3d8228 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -11,6 +11,8 @@ #include <linux/stddef.h> #include <linux/mm.h> #include <linux/module.h> +#include <linux/rbtree_latch.h> +#include <uapi/linux/perf_event.h> #include <asm/sections.h> @@ -20,6 +22,20 @@ struct module; +struct kallsym_node +{ + struct latch_tree_node kn_node; + unsigned long kn_addr; + unsigned long kn_len; + enum perf_record_ksymbol_type ksym_type; + void (*kn_names)(struct kallsym_node *kn, char *sym_name, char **mod_name); +}; + +extern void kallsym_tree_add(struct kallsym_node *kn); +extern void kallsym_tree_del(struct kallsym_node *kn); + +extern bool is_kallsym_tree_text_address(unsigned long addr); + static inline int is_kernel_inittext(unsigned long addr) { if (addr >= (unsigned long)_sinittext diff --git a/kernel/extable.c b/kernel/extable.c index 6a5b61ebc66c..5271e9b649b1 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -145,6 +145,8 @@ int kernel_text_address(unsigned long addr) if (is_module_text_address(addr)) goto out; + if (is_kallsym_tree_text_address(addr)) + goto out; if (is_ftrace_trampoline(addr)) goto out; if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr)) diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 14934afa9e68..30611a5379fd 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -24,6 +24,8 @@ #include <linux/filter.h> #include <linux/ftrace.h> #include <linux/compiler.h> +#include <linux/spinlock.h> +#include <linux/perf_event.h> /* * These will be re-linked against their real values @@ -48,6 +50,165 @@ extern const u16 kallsyms_token_index[] __weak; extern const unsigned int kallsyms_markers[] __weak; +static DEFINE_SPINLOCK(kallsym_lock); +static struct latch_tree_root kallsym_tree __cacheline_aligned; + +static __always_inline unsigned long +kallsym_node_addr(struct latch_tree_node *node) +{ + struct kallsym_node *kn; + + kn = container_of(node, struct kallsym_node, kn_node); + return kn->kn_addr; +} + +static __always_inline bool kallsym_tree_less(struct latch_tree_node *a, + struct latch_tree_node *b) +{ + return kallsym_node_addr(a) < kallsym_node_addr(b); +} + +static __always_inline int kallsym_tree_comp(void *key, + struct latch_tree_node *n) +{ + unsigned long val = (unsigned long)key; + unsigned long sym_start, sym_end; + const struct kallsym_node *kn; + + kn = container_of(n, struct kallsym_node, kn_node); + sym_start = kn->kn_addr; + sym_end = sym_start + kn->kn_len; + + if (val < sym_start) + return -1; + if (val >= sym_end) + return 1; + + return 0; +} + +static const struct latch_tree_ops kallsym_tree_ops = { + .less = kallsym_tree_less, + .comp = kallsym_tree_comp, +}; + +void kallsym_tree_add(struct kallsym_node *kn) +{ + char namebuf[KSYM_NAME_LEN] = ""; + char *modname = NULL; + + spin_lock_irq(&kallsym_lock); + latch_tree_insert(&kn->kn_node, &kallsym_tree, &kallsym_tree_ops); + spin_unlock_irq(&kallsym_lock); + + kn->kn_names(kn, namebuf, &modname); + + if (modname) { + int len = strlen(namebuf); + + snprintf(namebuf + len, sizeof(namebuf) - len, " [%s]", modname); + } + + perf_event_ksymbol(kn->ksym_type, kn->kn_addr, kn->kn_len, false, namebuf); +} + +void kallsym_tree_del(struct kallsym_node *kn) +{ + char namebuf[KSYM_NAME_LEN] = ""; + char *modname = NULL; + + kn->kn_names(kn, namebuf, &modname); + + if (modname) { + int len = strlen(namebuf); + + snprintf(namebuf + len, sizeof(namebuf) - len, " [%s]", modname); + } + + perf_event_ksymbol(kn->ksym_type, kn->kn_addr, kn->kn_len, true, namebuf); + + spin_lock_irq(&kallsym_lock); + latch_tree_erase(&kn->kn_node, &kallsym_tree, &kallsym_tree_ops); + spin_unlock_irq(&kallsym_lock); +} + +static struct kallsym_node *kallsym_tree_find(unsigned long addr) +{ + struct kallsym_node *kn = NULL; + struct latch_tree_node *n; + + n = latch_tree_find((void *)addr, &kallsym_tree, &kallsym_tree_ops); + if (n) + kn = container_of(n, struct kallsym_node, kn_node); + + return kn; +} + +static char *kallsym_tree_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char **modname, char *sym) +{ + struct kallsym_node *kn; + char *ret = NULL; + + rcu_read_lock(); + kn = kallsym_tree_find(addr); + if (kn) { + kn->kn_names(kn, sym, modname); + + ret = sym; + if (size) + *size = kn->kn_len; + if (off) + *off = addr - kn->kn_addr; + } + rcu_read_unlock(); + + return ret; +} + +bool is_kallsym_tree_text_address(unsigned long addr) +{ + bool ret; + + rcu_read_lock(); + ret = kallsym_tree_find(addr) != NULL; + rcu_read_unlock(); + + return ret; +} + +static int kallsym_tree_kallsym(unsigned int symnum, unsigned long *value, char *type, + char *sym, char *modname, int *exported) +{ + struct latch_tree_node *ltn; + int i, ret = -ERANGE; + + rcu_read_lock(); + for (i = 0, ltn = latch_tree_first(&kallsym_tree); i < symnum && ltn; + i++, ltn = latch_tree_next(&kallsym_tree, ltn)) + ; + + if (ltn) { + struct kallsym_node *kn; + char *mod; + + kn = container_of(ltn, struct kallsym_node, kn_node); + + kn->kn_names(kn, sym, &mod); + if (mod) + strlcpy(modname, mod, MODULE_NAME_LEN); + else + modname[0] = '\0'; + + *type = 't'; + *exported = 0; + ret = 0; + } + rcu_read_unlock(); + + return ret; +} + /* * Expand a compressed symbol data into the resulting uncompressed string, * if uncompressed string is too long (>= maxlen), it will be truncated, @@ -265,6 +426,7 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, if (is_ksym_addr(addr)) return !!get_symbol_pos(addr, symbolsize, offset); return !!module_address_lookup(addr, symbolsize, offset, NULL, namebuf) || + !!kallsym_tree_address_lookup(addr, symbolsize, offset, NULL, namebuf) || !!__bpf_address_lookup(addr, symbolsize, offset, namebuf); } @@ -300,6 +462,10 @@ const char *kallsyms_lookup(unsigned long addr, /* See if it's in a module or a BPF JITed image. */ ret = module_address_lookup(addr, symbolsize, offset, modname, namebuf); + if (!ret) + ret = kallsym_tree_address_lookup(addr, symbolsize, + offset, modname, namebuf); + if (!ret) ret = bpf_address_lookup(addr, symbolsize, offset, modname, namebuf); @@ -434,6 +600,7 @@ struct kallsym_iter { loff_t pos; loff_t pos_arch_end; loff_t pos_mod_end; + loff_t pos_tree_end; loff_t pos_ftrace_mod_end; unsigned long value; unsigned int nameoff; /* If iterating in core kernel symbols. */ @@ -478,9 +645,24 @@ static int get_ksymbol_mod(struct kallsym_iter *iter) return 1; } +static int get_ksymbol_tree(struct kallsym_iter *iter) +{ + int ret = kallsym_tree_kallsym(iter->pos - iter->pos_mod_end, + &iter->value, &iter->type, + iter->name, iter->module_name, + &iter->exported); + + if (ret < 0) { + iter->pos_tree_end = iter->pos; + return 0; + } + + return 1; +} + static int get_ksymbol_ftrace_mod(struct kallsym_iter *iter) { - int ret = ftrace_mod_get_kallsym(iter->pos - iter->pos_mod_end, + int ret = ftrace_mod_get_kallsym(iter->pos - iter->pos_tree_end, &iter->value, &iter->type, iter->name, iter->module_name, &iter->exported); @@ -545,6 +727,10 @@ static int update_iter_mod(struct kallsym_iter *iter, loff_t pos) get_ksymbol_mod(iter)) return 1; + if ((!iter->pos_tree_end || iter->pos_tree_end > pos) && + get_ksymbol_tree(iter)) + return 1; + if ((!iter->pos_ftrace_mod_end || iter->pos_ftrace_mod_end > pos) && get_ksymbol_ftrace_mod(iter)) return 1; -- 2.17.1
