Currently, `HAVE_JUMP_LABEL_BATCH` provides an architecture-level mechanism to defer instruction synchronization (`text_poke_sync()`) when patching a sequence of static keys. However, this deferred batching capability is not exposed as a public kernel API. Subsystems that need to toggle a large number of static keys (e.g., dynamic_debug) currently suffer from O(N) overhead due to repeated machine-wide synchronizations (stop_machine).
This patch introduces a public queueing API to expose this deferred synchronization mechanism to the rest of the kernel. This allows multiple static keys to be enabled/disabled by queueing their architecture-level updates, before applying a single machine-wide synchronization barrier after all instructions are modified. The new API consists of: - static_key_enable_queued(key) - static_key_disable_queued(key) - static_key_apply_queued() (the global barrier/flush) - static_branch_enable_queued(x) / static_branch_disable_queued(x) macros NOTES: The '_queued' API suffix was chosen to match the underlying 'arch_jump_label_transform_queue' and to avoid confusion with the existing rate-limited 'static_key_deferred' API. Also unify the names under the 'static_key_*' prefix, renaming jump_label_apply_queued to static_key_apply_queued (with a compatibility macro) for consistency. A pr_debug() is added to show the poked addresses, this exposed the semi-random ordering coming from dynamic-debug, despite its ordered descriptors. So x86/kernel/alternatives gets new code to do an insert-sort, by memcpy & memmove after appending. This sorting yields a dramatic IPI reduction; a following patch to dynamic-debug uses the API, and includes the numbers. Cc: Jason Baron <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Josh Poimboeuf <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Alice Ryhl <[email protected]> Cc: Steven Rostedt <[email protected]> Cc: Ard Biesheuvel <[email protected]> Cc: Alexandre Chartre <[email protected]> Cc: Juergen Gross <[email protected]> Cc: Andy Lutomirski <[email protected]> Signed-off-by: Jim Cromie <[email protected]> --- arch/Kconfig | 3 + arch/x86/Kconfig | 1 + arch/x86/kernel/alternative.c | 50 +++++++++----- arch/x86/kernel/jump_label.c | 13 +++- include/linux/jump_label.h | 24 +++++++ kernel/jump_label.c | 125 +++++++++++++++++++++++++++++++--- 6 files changed, 186 insertions(+), 30 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 102ddbd4298e..388a73545005 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -505,6 +505,9 @@ config HAVE_ARCH_JUMP_LABEL config HAVE_ARCH_JUMP_LABEL_RELATIVE bool +config HAVE_JUMP_LABEL_BATCH + bool + config MMU_GATHER_TABLE_FREE bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e2df1b147184..4d7705890558 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -249,6 +249,7 @@ config X86 select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64 select HAVE_IRQ_TIME_ACCOUNTING + select HAVE_JUMP_LABEL_BATCH select HAVE_JUMP_LABEL_HACK if HAVE_OBJTOOL select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a888ae0f01fb..85df82c36543 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -3137,26 +3137,19 @@ static void __smp_text_poke_batch_add(void *addr, const void *opcode, size_t len } /* - * We hard rely on the text_poke_array.vec being ordered; ensure this is so by flushing - * early if needed. + * We hard rely on the text_poke_array.vec being ordered; ensure this + * by finding where to insert to preserve the order, and mem-moving + * into place after appending it. */ -static bool text_poke_addr_ordered(void *addr) +static int text_poke_get_insert_idx(void *addr) { - WARN_ON_ONCE(!addr); + int i; - if (!text_poke_array.nr_entries) - return true; - - /* - * If the last current entry's address is higher than the - * new entry's address we'd like to add, then ordering - * is violated and we must first flush all pending patching - * requests: - */ - if (text_poke_addr(text_poke_array.vec + text_poke_array.nr_entries-1) > addr) - return false; - - return true; + for (i = 0; i < text_poke_array.nr_entries; i++) { + if (text_poke_addr(&text_poke_array.vec[i]) > addr) + return i; + } + return text_poke_array.nr_entries; } /** @@ -3174,9 +3167,30 @@ static bool text_poke_addr_ordered(void *addr) */ void __ref smp_text_poke_batch_add(void *addr, const void *opcode, size_t len, const void *emulate) { - if (text_poke_array.nr_entries == TEXT_POKE_ARRAY_MAX || !text_poke_addr_ordered(addr)) + int insert_idx; + + pr_debug("incoming addr=%px, current_qlen=%d\n", + addr, text_poke_array.nr_entries); + + if (text_poke_array.nr_entries == TEXT_POKE_ARRAY_MAX) smp_text_poke_batch_finish(); + + insert_idx = text_poke_get_insert_idx(addr); __smp_text_poke_batch_add(addr, opcode, len, emulate); + + if (insert_idx < text_poke_array.nr_entries - 1) { + struct smp_text_poke_loc tmp; + int last = text_poke_array.nr_entries - 1; + /* Copy the newly appended item out */ + memcpy(&tmp, &text_poke_array.vec[last], sizeof(tmp)); + + /* Shift everything from insert_idx over by 1 */ + memmove(&text_poke_array.vec[insert_idx + 1], + &text_poke_array.vec[insert_idx], + (last - insert_idx) * sizeof(struct smp_text_poke_loc)); + /* Drop the new item into its sorted home */ + memcpy(&text_poke_array.vec[insert_idx], &tmp, sizeof(tmp)); + } } /** diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index a7949a54a0ff..6b5bab5f34e8 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -120,6 +120,8 @@ void arch_jump_label_transform(struct jump_entry *entry, jump_label_transform(entry, type, 0); } +static int jump_label_queue_len; + bool arch_jump_label_transform_queue(struct jump_entry *entry, enum jump_label_type type) { @@ -135,14 +137,23 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry, mutex_lock(&text_mutex); jlp = __jump_label_patch(entry, type); - smp_text_poke_batch_add((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL); + smp_text_poke_batch_add((void *)jump_entry_code(entry), + jlp.code, jlp.size, NULL); + jump_label_queue_len++; mutex_unlock(&text_mutex); return true; } void arch_jump_label_transform_apply(void) { + if (!jump_label_queue_len) { + pr_debug("no queued jump_labels to apply\n"); + return; + } + + pr_debug("applying %d queued jump_labels\n", jump_label_queue_len); mutex_lock(&text_mutex); smp_text_poke_batch_finish(); + jump_label_queue_len = 0; mutex_unlock(&text_mutex); } diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index fdb79dd1ebd8..17f572abe4bb 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -234,10 +234,20 @@ extern void static_key_slow_dec_cpuslocked(struct static_key *key); extern int static_key_count(struct static_key *key); extern void static_key_enable(struct static_key *key); extern void static_key_disable(struct static_key *key); +extern void static_key_enable_queued(struct static_key *key); +extern void static_key_disable_queued(struct static_key *key); +extern void static_key_apply_queued(void); extern void static_key_enable_cpuslocked(struct static_key *key); extern void static_key_disable_cpuslocked(struct static_key *key); extern enum jump_label_type jump_label_init_type(struct jump_entry *entry); +#define static_branch_enable(x) static_key_enable(&(x)->key) +#define static_branch_disable(x) static_key_disable(&(x)->key) +#define static_branch_enable_queued(x) static_key_enable_queued(&(x)->key) +#define static_branch_disable_queued(x) static_key_disable_queued(&(x)->key) +#define static_branch_enable_cpuslocked(x) static_key_enable_cpuslocked(&(x)->key) +#define static_branch_disable_cpuslocked(x) static_key_disable_cpuslocked(&(x)->key) + /* * We should be using ATOMIC_INIT() for initializing .enabled, but * the inclusion of atomic.h is problematic for inclusion of jump_label.h @@ -340,6 +350,18 @@ static inline void static_key_disable(struct static_key *key) atomic_set(&key->enabled, 0); } +static inline void static_key_enable_queued(struct static_key *key) +{ + static_key_enable(key); +} + +static inline void static_key_disable_queued(struct static_key *key) +{ + static_key_disable(key); +} + +static inline void static_key_apply_queued(void) {} + #define static_key_enable_cpuslocked(k) static_key_enable((k)) #define static_key_disable_cpuslocked(k) static_key_disable((k)) @@ -535,6 +557,8 @@ extern bool ____wrong_branch_error(void); #define static_branch_enable(x) static_key_enable(&(x)->key) #define static_branch_disable(x) static_key_disable(&(x)->key) +#define static_branch_enable_queued(x) static_key_enable_queued(&(x)->key) +#define static_branch_disable_queued(x) static_key_disable_queued(&(x)->key) #define static_branch_enable_cpuslocked(x) static_key_enable_cpuslocked(&(x)->key) #define static_branch_disable_cpuslocked(x) static_key_disable_cpuslocked(&(x)->key) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 7cb19e601426..76a0f4e68b73 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -91,6 +91,7 @@ jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop) } static void jump_label_update(struct static_key *key); +static void jump_label_update_queued(struct static_key *key); /* * There are similar definitions for the !CONFIG_JUMP_LABEL case in jump_label.h. @@ -250,6 +251,41 @@ void static_key_disable(struct static_key *key) } EXPORT_SYMBOL_GPL(static_key_disable); +void static_key_enable_queued(struct static_key *key) +{ + STATIC_KEY_CHECK_USE(key); + + if (atomic_read(&key->enabled) > 0) { + WARN_ON_ONCE(atomic_read(&key->enabled) != 1); + return; + } + + jump_label_lock(); + if (atomic_read(&key->enabled) == 0) { + atomic_set(&key->enabled, -1); + jump_label_update_queued(key); + atomic_set_release(&key->enabled, 1); + } + jump_label_unlock(); +} +EXPORT_SYMBOL_GPL(static_key_enable_queued); + +void static_key_disable_queued(struct static_key *key) +{ + STATIC_KEY_CHECK_USE(key); + + if (atomic_read(&key->enabled) != 1) { + WARN_ON_ONCE(atomic_read(&key->enabled) != 0); + return; + } + + jump_label_lock(); + if (atomic_cmpxchg(&key->enabled, 1, 0) == 1) + jump_label_update_queued(key); + jump_label_unlock(); +} +EXPORT_SYMBOL_GPL(static_key_disable_queued); + static bool static_key_dec_not_one(struct static_key *key) { int v; @@ -488,39 +524,59 @@ static bool jump_label_can_update(struct jump_entry *entry, bool init) return true; } -#ifndef HAVE_JUMP_LABEL_BATCH static void __jump_label_update(struct static_key *key, struct jump_entry *entry, struct jump_entry *stop, bool init) { +#ifndef HAVE_JUMP_LABEL_BATCH for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) { if (jump_label_can_update(entry, init)) arch_jump_label_transform(entry, jump_label_type(entry)); } -} #else -static void __jump_label_update(struct static_key *key, - struct jump_entry *entry, - struct jump_entry *stop, - bool init) -{ for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) { if (!jump_label_can_update(entry, init)) continue; if (!arch_jump_label_transform_queue(entry, jump_label_type(entry))) { - /* - * Queue is full: Apply the current queue and try again. - */ arch_jump_label_transform_apply(); - BUG_ON(!arch_jump_label_transform_queue(entry, jump_label_type(entry))); + WARN_ON_ONCE(!arch_jump_label_transform_queue(entry, jump_label_type(entry))); } } arch_jump_label_transform_apply(); +#endif } + +static void __jump_label_update_queued(struct static_key *key, + struct jump_entry *entry, + struct jump_entry *stop, + bool init) +{ +#ifdef HAVE_JUMP_LABEL_BATCH + for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) { + + if (!jump_label_can_update(entry, init)) + continue; + + if (!arch_jump_label_transform_queue(entry, jump_label_type(entry))) { + arch_jump_label_transform_apply(); + WARN_ON_ONCE(!arch_jump_label_transform_queue(entry, jump_label_type(entry))); + } + } +#else + __jump_label_update(key, entry, stop, init); +#endif +} + +void static_key_apply_queued(void) +{ +#ifdef HAVE_JUMP_LABEL_BATCH + arch_jump_label_transform_apply(); #endif +} +EXPORT_SYMBOL_GPL(static_key_apply_queued); void __init jump_label_init(void) { @@ -696,6 +752,27 @@ static void __jump_label_mod_update(struct static_key *key) } } +static void __jump_label_mod_update_queued(struct static_key *key) +{ + struct static_key_mod *mod; + + for (mod = static_key_mod(key); mod; mod = mod->next) { + struct jump_entry *stop; + struct module *m; + + if (!mod->entries) + continue; + + m = mod->mod; + if (!m) + stop = __stop___jump_table; + else + stop = m->jump_entries + m->num_jump_entries; + __jump_label_update_queued(key, mod->entries, stop, + m && m->state == MODULE_STATE_COMING); + } +} + static int jump_label_add_module(struct module *mod) { struct jump_entry *iter_start = mod->jump_entries; @@ -919,6 +996,32 @@ static void jump_label_update(struct static_key *key) __jump_label_update(key, entry, stop, init); } +static void jump_label_update_queued(struct static_key *key) +{ + struct jump_entry *stop = __stop___jump_table; + bool init = system_state < SYSTEM_RUNNING; + struct jump_entry *entry; +#ifdef CONFIG_MODULES + struct module *mod; + + if (static_key_linked(key)) { + __jump_label_mod_update_queued(key); + return; + } + + scoped_guard(rcu) { + mod = __module_address((unsigned long)key); + if (mod) { + stop = mod->jump_entries + mod->num_jump_entries; + init = mod->state == MODULE_STATE_COMING; + } + } +#endif + entry = static_key_entries(key); + if (entry) + __jump_label_update_queued(key, entry, stop, init); +} + #ifdef CONFIG_STATIC_KEYS_SELFTEST static DEFINE_STATIC_KEY_TRUE(sk_true); static DEFINE_STATIC_KEY_FALSE(sk_false); -- 2.53.0
