commit:     9291c0c0c06fefce13e8b3de296acf45d239d0e8
Author:     Arisu Tachibana <alicef <AT> gentoo <DOT> org>
AuthorDate: Mon Nov  3 01:31:15 2025 +0000
Commit:     Arisu Tachibana <alicef <AT> gentoo <DOT> org>
CommitDate: Mon Nov  3 01:31:15 2025 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=9291c0c0

Linux patch 6.17.7

Signed-off-by: Arisu Tachibana <alicef <AT> gentoo.org>

 0000_README             |    4 +
 1006_linux-6.17.7.patch | 3343 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 3347 insertions(+)

diff --git a/0000_README b/0000_README
index 24a12c6c..6ab66ab4 100644
--- a/0000_README
+++ b/0000_README
@@ -67,6 +67,10 @@ Patch:  1005_linux-6.17.6.patch
 From:   https://www.kernel.org
 Desc:   Linux 6.17.6
 
+Patch:  1006_linux-6.17.7.patch
+From:   https://www.kernel.org
+Desc:   Linux 6.17.7
+
 Patch:  1510_fs-enable-link-security-restrictions-by-default.patch
 From:   
http://sources.debian.net/src/linux/3.16.7-ckt4-3/debian/patches/debian/fs-enable-link-security-restrictions-by-default.patch/
 Desc:   Enable link security restrictions by default.

diff --git a/1006_linux-6.17.7.patch b/1006_linux-6.17.7.patch
new file mode 100644
index 00000000..2130d13c
--- /dev/null
+++ b/1006_linux-6.17.7.patch
@@ -0,0 +1,3343 @@
+diff --git a/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst 
b/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst
+index 5964901d66e317..d0bdbd81dcf9f2 100644
+--- a/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst
++++ b/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst
+@@ -218,6 +218,7 @@ SRSO                  X              X            X        
      X
+ SSB                                  X
+ TAA                   X              X            X              X            
*       (Note 2)
+ TSA                   X              X            X              X
++VMSCAPE                                           X
+ =============== ============== ============ ============= ============== 
============ ========
+ 
+ Notes:
+diff --git a/Makefile b/Makefile
+index d090c7c253e8d3..570042d208fd3e 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 6
+ PATCHLEVEL = 17
+-SUBLEVEL = 6
++SUBLEVEL = 7
+ EXTRAVERSION =
+ NAME = Baby Opossum Posse
+ 
+diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c
+index e9dad60b147f33..1ebb058904992b 100644
+--- a/arch/alpha/kernel/asm-offsets.c
++++ b/arch/alpha/kernel/asm-offsets.c
+@@ -4,6 +4,7 @@
+  * This code generates raw asm output which is post-processed to extract
+  * and format the required data.
+  */
++#define COMPILE_OFFSETS
+ 
+ #include <linux/types.h>
+ #include <linux/stddef.h>
+diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c
+index f77deb7991757e..2978da85fcb65b 100644
+--- a/arch/arc/kernel/asm-offsets.c
++++ b/arch/arc/kernel/asm-offsets.c
+@@ -2,6 +2,7 @@
+ /*
+  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+  */
++#define COMPILE_OFFSETS
+ 
+ #include <linux/sched.h>
+ #include <linux/mm.h>
+diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
+index 123f4a8ef44660..2101938d27fcbc 100644
+--- a/arch/arm/kernel/asm-offsets.c
++++ b/arch/arm/kernel/asm-offsets.c
+@@ -7,6 +7,8 @@
+  * This code generates raw asm output which is post-processed to extract
+  * and format the required data.
+  */
++#define COMPILE_OFFSETS
++
+ #include <linux/compiler.h>
+ #include <linux/sched.h>
+ #include <linux/mm.h>
+diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
+index 30d4bbe68661f4..b6367ff3a49ca1 100644
+--- a/arch/arm64/kernel/asm-offsets.c
++++ b/arch/arm64/kernel/asm-offsets.c
+@@ -6,6 +6,7 @@
+  *               2001-2002 Keith Owens
+  * Copyright (C) 2012 ARM Ltd.
+  */
++#define COMPILE_OFFSETS
+ 
+ #include <linux/arm_sdei.h>
+ #include <linux/sched.h>
+diff --git a/arch/csky/kernel/asm-offsets.c b/arch/csky/kernel/asm-offsets.c
+index d1e9035794733d..5525c8e7e1d9ea 100644
+--- a/arch/csky/kernel/asm-offsets.c
++++ b/arch/csky/kernel/asm-offsets.c
+@@ -1,5 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0
+ // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
++#define COMPILE_OFFSETS
+ 
+ #include <linux/sched.h>
+ #include <linux/kernel_stat.h>
+diff --git a/arch/hexagon/kernel/asm-offsets.c 
b/arch/hexagon/kernel/asm-offsets.c
+index 03a7063f945614..50eea9fa6f1375 100644
+--- a/arch/hexagon/kernel/asm-offsets.c
++++ b/arch/hexagon/kernel/asm-offsets.c
+@@ -8,6 +8,7 @@
+  *
+  * Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
+  */
++#define COMPILE_OFFSETS
+ 
+ #include <linux/compat.h>
+ #include <linux/types.h>
+diff --git a/arch/loongarch/kernel/asm-offsets.c 
b/arch/loongarch/kernel/asm-offsets.c
+index db1e4bb26b6a01..3017c715760099 100644
+--- a/arch/loongarch/kernel/asm-offsets.c
++++ b/arch/loongarch/kernel/asm-offsets.c
+@@ -4,6 +4,8 @@
+  *
+  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+  */
++#define COMPILE_OFFSETS
++
+ #include <linux/types.h>
+ #include <linux/sched.h>
+ #include <linux/mm.h>
+diff --git a/arch/m68k/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets.c
+index 906d7323053744..67a1990f9d748f 100644
+--- a/arch/m68k/kernel/asm-offsets.c
++++ b/arch/m68k/kernel/asm-offsets.c
+@@ -9,6 +9,7 @@
+  * #defines from the assembly-language output.
+  */
+ 
++#define COMPILE_OFFSETS
+ #define ASM_OFFSETS_C
+ 
+ #include <linux/stddef.h>
+diff --git a/arch/microblaze/kernel/asm-offsets.c 
b/arch/microblaze/kernel/asm-offsets.c
+index 104c3ac5f30c88..b4b67d58e7f6ae 100644
+--- a/arch/microblaze/kernel/asm-offsets.c
++++ b/arch/microblaze/kernel/asm-offsets.c
+@@ -7,6 +7,7 @@
+  * License. See the file "COPYING" in the main directory of this archive
+  * for more details.
+  */
++#define COMPILE_OFFSETS
+ 
+ #include <linux/init.h>
+ #include <linux/stddef.h>
+diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
+index 1e29efcba46e57..5debd9a3854a9e 100644
+--- a/arch/mips/kernel/asm-offsets.c
++++ b/arch/mips/kernel/asm-offsets.c
+@@ -9,6 +9,8 @@
+  * Kevin Kissell, [email protected] and Carsten Langgaard, [email protected]
+  * Copyright (C) 2000 MIPS Technologies, Inc.
+  */
++#define COMPILE_OFFSETS
++
+ #include <linux/compat.h>
+ #include <linux/types.h>
+ #include <linux/sched.h>
+diff --git a/arch/nios2/kernel/asm-offsets.c b/arch/nios2/kernel/asm-offsets.c
+index e3d9b7b6fb48aa..88190b503ce5de 100644
+--- a/arch/nios2/kernel/asm-offsets.c
++++ b/arch/nios2/kernel/asm-offsets.c
+@@ -2,6 +2,7 @@
+ /*
+  * Copyright (C) 2011 Tobias Klauser <[email protected]>
+  */
++#define COMPILE_OFFSETS
+ 
+ #include <linux/stddef.h>
+ #include <linux/sched.h>
+diff --git a/arch/openrisc/kernel/asm-offsets.c 
b/arch/openrisc/kernel/asm-offsets.c
+index 710651d5aaae10..3cc826f2216b10 100644
+--- a/arch/openrisc/kernel/asm-offsets.c
++++ b/arch/openrisc/kernel/asm-offsets.c
+@@ -18,6 +18,7 @@
+  * compile this file to assembler, and then extract the
+  * #defines from the assembly-language output.
+  */
++#define COMPILE_OFFSETS
+ 
+ #include <linux/signal.h>
+ #include <linux/sched.h>
+diff --git a/arch/parisc/kernel/asm-offsets.c 
b/arch/parisc/kernel/asm-offsets.c
+index 757816a7bd4b28..9abfe65492c65e 100644
+--- a/arch/parisc/kernel/asm-offsets.c
++++ b/arch/parisc/kernel/asm-offsets.c
+@@ -13,6 +13,7 @@
+  *    Copyright (C) 2002 Randolph Chung <tausq with parisc-linux.org>
+  *    Copyright (C) 2003 James Bottomley <jejb at parisc-linux.org>
+  */
++#define COMPILE_OFFSETS
+ 
+ #include <linux/types.h>
+ #include <linux/sched.h>
+diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
+index b3048f6d3822c0..a4bc80b30410ae 100644
+--- a/arch/powerpc/kernel/asm-offsets.c
++++ b/arch/powerpc/kernel/asm-offsets.c
+@@ -8,6 +8,7 @@
+  * compile this file to assembler, and then extract the
+  * #defines from the assembly-language output.
+  */
++#define COMPILE_OFFSETS
+ 
+ #include <linux/compat.h>
+ #include <linux/signal.h>
+diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
+index 6e8c0d6feae9e9..7d42d3b8a32a75 100644
+--- a/arch/riscv/kernel/asm-offsets.c
++++ b/arch/riscv/kernel/asm-offsets.c
+@@ -3,6 +3,7 @@
+  * Copyright (C) 2012 Regents of the University of California
+  * Copyright (C) 2017 SiFive
+  */
++#define COMPILE_OFFSETS
+ 
+ #include <linux/kbuild.h>
+ #include <linux/mm.h>
+diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
+index 95ecad9c7d7d27..a8915663e917fa 100644
+--- a/arch/s390/kernel/asm-offsets.c
++++ b/arch/s390/kernel/asm-offsets.c
+@@ -4,6 +4,7 @@
+  * This code generates raw asm output which is post-processed to extract
+  * and format the required data.
+  */
++#define COMPILE_OFFSETS
+ 
+ #include <linux/kbuild.h>
+ #include <linux/sched.h>
+diff --git a/arch/sh/kernel/asm-offsets.c b/arch/sh/kernel/asm-offsets.c
+index a0322e8328456e..429b6a76314684 100644
+--- a/arch/sh/kernel/asm-offsets.c
++++ b/arch/sh/kernel/asm-offsets.c
+@@ -8,6 +8,7 @@
+  * compile this file to assembler, and then extract the
+  * #defines from the assembly-language output.
+  */
++#define COMPILE_OFFSETS
+ 
+ #include <linux/stddef.h>
+ #include <linux/types.h>
+diff --git a/arch/sparc/kernel/asm-offsets.c b/arch/sparc/kernel/asm-offsets.c
+index 3d9b9855dce917..6e660bde48dd89 100644
+--- a/arch/sparc/kernel/asm-offsets.c
++++ b/arch/sparc/kernel/asm-offsets.c
+@@ -10,6 +10,7 @@
+  *
+  * On sparc, thread_info data is static and TI_XXX offsets are computed by 
hand.
+  */
++#define COMPILE_OFFSETS
+ 
+ #include <linux/sched.h>
+ #include <linux/mm_types.h>
+diff --git a/arch/um/kernel/asm-offsets.c b/arch/um/kernel/asm-offsets.c
+index 1fb12235ab9c84..a69873aa697f4f 100644
+--- a/arch/um/kernel/asm-offsets.c
++++ b/arch/um/kernel/asm-offsets.c
+@@ -1 +1,3 @@
++#define COMPILE_OFFSETS
++
+ #include <sysdep/kernel-offsets.h>
+diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
+index 15da60cf69f20c..046d12281fd94e 100644
+--- a/arch/x86/events/intel/core.c
++++ b/arch/x86/events/intel/core.c
+@@ -2845,8 +2845,8 @@ static void intel_pmu_enable_fixed(struct perf_event 
*event)
+ {
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+-      u64 mask, bits = 0;
+       int idx = hwc->idx;
++      u64 bits = 0;
+ 
+       if (is_topdown_idx(idx)) {
+               struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+@@ -2885,14 +2885,10 @@ static void intel_pmu_enable_fixed(struct perf_event 
*event)
+ 
+       idx -= INTEL_PMC_IDX_FIXED;
+       bits = intel_fixed_bits_by_idx(idx, bits);
+-      mask = intel_fixed_bits_by_idx(idx, INTEL_FIXED_BITS_MASK);
+-
+-      if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) {
++      if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip)
+               bits |= intel_fixed_bits_by_idx(idx, ICL_FIXED_0_ADAPTIVE);
+-              mask |= intel_fixed_bits_by_idx(idx, ICL_FIXED_0_ADAPTIVE);
+-      }
+ 
+-      cpuc->fixed_ctrl_val &= ~mask;
++      cpuc->fixed_ctrl_val &= ~intel_fixed_bits_by_idx(idx, 
INTEL_FIXED_BITS_MASK);
+       cpuc->fixed_ctrl_val |= bits;
+ }
+ 
+diff --git a/arch/x86/include/asm/perf_event.h 
b/arch/x86/include/asm/perf_event.h
+index 70d1d94aca7e63..ee943bd1595af5 100644
+--- a/arch/x86/include/asm/perf_event.h
++++ b/arch/x86/include/asm/perf_event.h
+@@ -35,7 +35,6 @@
+ #define ARCH_PERFMON_EVENTSEL_EQ                      (1ULL << 36)
+ #define ARCH_PERFMON_EVENTSEL_UMASK2                  (0xFFULL << 40)
+ 
+-#define INTEL_FIXED_BITS_MASK                         0xFULL
+ #define INTEL_FIXED_BITS_STRIDE                       4
+ #define INTEL_FIXED_0_KERNEL                          (1ULL << 0)
+ #define INTEL_FIXED_0_USER                            (1ULL << 1)
+@@ -48,6 +47,11 @@
+ #define ICL_EVENTSEL_ADAPTIVE                         (1ULL << 34)
+ #define ICL_FIXED_0_ADAPTIVE                          (1ULL << 32)
+ 
++#define INTEL_FIXED_BITS_MASK                                 \
++      (INTEL_FIXED_0_KERNEL | INTEL_FIXED_0_USER |            \
++       INTEL_FIXED_0_ANYTHREAD | INTEL_FIXED_0_ENABLE_PMI |   \
++       ICL_FIXED_0_ADAPTIVE)
++
+ #define intel_fixed_bits_by_idx(_idx, _bits)                  \
+       ((_bits) << ((_idx) * INTEL_FIXED_BITS_STRIDE))
+ 
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index 36dcfc5105be9a..26ece97011fd7e 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -434,6 +434,9 @@ static bool __init should_mitigate_vuln(unsigned int bug)
+       case X86_BUG_SPEC_STORE_BYPASS:
+               return cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER);
+ 
++      case X86_BUG_VMSCAPE:
++              return cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_HOST);
++
+       default:
+               WARN(1, "Unknown bug %x\n", bug);
+               return false;
+@@ -1460,8 +1463,12 @@ static void __init retbleed_update_mitigation(void)
+                       retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
+                       break;
+               default:
+-                      if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF)
+-                              pr_err(RETBLEED_INTEL_MSG);
++                      if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF) {
++                              if (retbleed_mitigation != 
RETBLEED_MITIGATION_NONE)
++                                      pr_err(RETBLEED_INTEL_MSG);
++
++                              retbleed_mitigation = RETBLEED_MITIGATION_NONE;
++                      }
+               }
+       }
+ 
+@@ -2045,7 +2052,7 @@ static void __init spectre_v2_user_apply_mitigation(void)
+ static const char * const spectre_v2_strings[] = {
+       [SPECTRE_V2_NONE]                       = "Vulnerable",
+       [SPECTRE_V2_RETPOLINE]                  = "Mitigation: Retpolines",
+-      [SPECTRE_V2_LFENCE]                     = "Mitigation: LFENCE",
++      [SPECTRE_V2_LFENCE]                     = "Vulnerable: LFENCE",
+       [SPECTRE_V2_EIBRS]                      = "Mitigation: Enhanced / 
Automatic IBRS",
+       [SPECTRE_V2_EIBRS_LFENCE]               = "Mitigation: Enhanced / 
Automatic IBRS + LFENCE",
+       [SPECTRE_V2_EIBRS_RETPOLINE]            = "Mitigation: Enhanced / 
Automatic IBRS + Retpolines",
+@@ -3304,15 +3311,18 @@ early_param("vmscape", vmscape_parse_cmdline);
+ 
+ static void __init vmscape_select_mitigation(void)
+ {
+-      if (cpu_mitigations_off() ||
+-          !boot_cpu_has_bug(X86_BUG_VMSCAPE) ||
++      if (!boot_cpu_has_bug(X86_BUG_VMSCAPE) ||
+           !boot_cpu_has(X86_FEATURE_IBPB)) {
+               vmscape_mitigation = VMSCAPE_MITIGATION_NONE;
+               return;
+       }
+ 
+-      if (vmscape_mitigation == VMSCAPE_MITIGATION_AUTO)
+-              vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER;
++      if (vmscape_mitigation == VMSCAPE_MITIGATION_AUTO) {
++              if (should_mitigate_vuln(X86_BUG_VMSCAPE))
++                      vmscape_mitigation = 
VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER;
++              else
++                      vmscape_mitigation = VMSCAPE_MITIGATION_NONE;
++      }
+ }
+ 
+ static void __init vmscape_update_mitigation(void)
+@@ -3626,9 +3636,6 @@ static const char *spectre_bhi_state(void)
+ 
+ static ssize_t spectre_v2_show_state(char *buf)
+ {
+-      if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
+-              return sysfs_emit(buf, "Vulnerable: LFENCE\n");
+-
+       if (spectre_v2_enabled == SPECTRE_V2_EIBRS && 
unprivileged_ebpf_enabled())
+               return sysfs_emit(buf, "Vulnerable: eIBRS with unprivileged 
eBPF\n");
+ 
+diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
+index ad89d0bd600581..103604c4b33b58 100644
+--- a/arch/x86/kvm/pmu.h
++++ b/arch/x86/kvm/pmu.h
+@@ -13,7 +13,7 @@
+ #define MSR_IA32_MISC_ENABLE_PMU_RO_MASK (MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | 
\
+                                         MSR_IA32_MISC_ENABLE_BTS_UNAVAIL)
+ 
+-/* retrieve the 4 bits for EN and PMI out of IA32_FIXED_CTR_CTRL */
++/* retrieve a fixed counter bits out of IA32_FIXED_CTR_CTRL */
+ #define fixed_ctrl_field(ctrl_reg, idx) \
+       (((ctrl_reg) >> ((idx) * INTEL_FIXED_BITS_STRIDE)) & 
INTEL_FIXED_BITS_MASK)
+ 
+diff --git a/arch/xtensa/kernel/asm-offsets.c 
b/arch/xtensa/kernel/asm-offsets.c
+index da38de20ae598b..cfbced95e944a4 100644
+--- a/arch/xtensa/kernel/asm-offsets.c
++++ b/arch/xtensa/kernel/asm-offsets.c
+@@ -11,6 +11,7 @@
+  *
+  * Chris Zankel <[email protected]>
+  */
++#define COMPILE_OFFSETS
+ 
+ #include <asm/processor.h>
+ #include <asm/coprocessor.h>
+diff --git a/drivers/edac/ecs.c b/drivers/edac/ecs.c
+old mode 100755
+new mode 100644
+diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
+index 0f338adf7d9376..8689631f190536 100644
+--- a/drivers/edac/edac_mc_sysfs.c
++++ b/drivers/edac/edac_mc_sysfs.c
+@@ -305,6 +305,14 @@ DEVICE_CHANNEL(ch10_dimm_label, S_IRUGO | S_IWUSR,
+       channel_dimm_label_show, channel_dimm_label_store, 10);
+ DEVICE_CHANNEL(ch11_dimm_label, S_IRUGO | S_IWUSR,
+       channel_dimm_label_show, channel_dimm_label_store, 11);
++DEVICE_CHANNEL(ch12_dimm_label, S_IRUGO | S_IWUSR,
++      channel_dimm_label_show, channel_dimm_label_store, 12);
++DEVICE_CHANNEL(ch13_dimm_label, S_IRUGO | S_IWUSR,
++      channel_dimm_label_show, channel_dimm_label_store, 13);
++DEVICE_CHANNEL(ch14_dimm_label, S_IRUGO | S_IWUSR,
++      channel_dimm_label_show, channel_dimm_label_store, 14);
++DEVICE_CHANNEL(ch15_dimm_label, S_IRUGO | S_IWUSR,
++      channel_dimm_label_show, channel_dimm_label_store, 15);
+ 
+ /* Total possible dynamic DIMM Label attribute file table */
+ static struct attribute *dynamic_csrow_dimm_attr[] = {
+@@ -320,6 +328,10 @@ static struct attribute *dynamic_csrow_dimm_attr[] = {
+       &dev_attr_legacy_ch9_dimm_label.attr.attr,
+       &dev_attr_legacy_ch10_dimm_label.attr.attr,
+       &dev_attr_legacy_ch11_dimm_label.attr.attr,
++      &dev_attr_legacy_ch12_dimm_label.attr.attr,
++      &dev_attr_legacy_ch13_dimm_label.attr.attr,
++      &dev_attr_legacy_ch14_dimm_label.attr.attr,
++      &dev_attr_legacy_ch15_dimm_label.attr.attr,
+       NULL
+ };
+ 
+@@ -348,6 +360,14 @@ DEVICE_CHANNEL(ch10_ce_count, S_IRUGO,
+                  channel_ce_count_show, NULL, 10);
+ DEVICE_CHANNEL(ch11_ce_count, S_IRUGO,
+                  channel_ce_count_show, NULL, 11);
++DEVICE_CHANNEL(ch12_ce_count, S_IRUGO,
++                 channel_ce_count_show, NULL, 12);
++DEVICE_CHANNEL(ch13_ce_count, S_IRUGO,
++                 channel_ce_count_show, NULL, 13);
++DEVICE_CHANNEL(ch14_ce_count, S_IRUGO,
++                 channel_ce_count_show, NULL, 14);
++DEVICE_CHANNEL(ch15_ce_count, S_IRUGO,
++                 channel_ce_count_show, NULL, 15);
+ 
+ /* Total possible dynamic ce_count attribute file table */
+ static struct attribute *dynamic_csrow_ce_count_attr[] = {
+@@ -363,6 +383,10 @@ static struct attribute *dynamic_csrow_ce_count_attr[] = {
+       &dev_attr_legacy_ch9_ce_count.attr.attr,
+       &dev_attr_legacy_ch10_ce_count.attr.attr,
+       &dev_attr_legacy_ch11_ce_count.attr.attr,
++      &dev_attr_legacy_ch12_ce_count.attr.attr,
++      &dev_attr_legacy_ch13_ce_count.attr.attr,
++      &dev_attr_legacy_ch14_ce_count.attr.attr,
++      &dev_attr_legacy_ch15_ce_count.attr.attr,
+       NULL
+ };
+ 
+diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c
+index 5c1fa1c0d12e3c..5a080ab65476da 100644
+--- a/drivers/edac/ie31200_edac.c
++++ b/drivers/edac/ie31200_edac.c
+@@ -99,6 +99,8 @@
+ 
+ /* Alder Lake-S */
+ #define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_1   0x4660
++#define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_2   0x4668  /* 8P+4E, e.g. 
i7-12700K */
++#define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_3   0x4648  /* 6P+4E, e.g. 
i5-12600K */
+ 
+ /* Bartlett Lake-S */
+ #define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_1   0x4639
+@@ -761,6 +763,8 @@ static const struct pci_device_id ie31200_pci_tbl[] = {
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_6), 
(kernel_ulong_t)&rpl_s_cfg},
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_HX_1), 
(kernel_ulong_t)&rpl_s_cfg},
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_1), 
(kernel_ulong_t)&rpl_s_cfg},
++      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_2), 
(kernel_ulong_t)&rpl_s_cfg},
++      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_3), 
(kernel_ulong_t)&rpl_s_cfg},
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_1), 
(kernel_ulong_t)&rpl_s_cfg},
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_2), 
(kernel_ulong_t)&rpl_s_cfg},
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_3), 
(kernel_ulong_t)&rpl_s_cfg},
+diff --git a/drivers/edac/mem_repair.c b/drivers/edac/mem_repair.c
+old mode 100755
+new mode 100644
+diff --git a/drivers/edac/scrub.c b/drivers/edac/scrub.c
+old mode 100755
+new mode 100644
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 70fc4e7cc5a0e6..0b02e36b30558e 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -2087,10 +2087,10 @@ static int btrfs_replay_log(struct btrfs_fs_info 
*fs_info,
+ 
+       /* returns with log_tree_root freed on success */
+       ret = btrfs_recover_log_trees(log_tree_root);
++      btrfs_put_root(log_tree_root);
+       if (ret) {
+               btrfs_handle_fs_error(fs_info, ret,
+                                     "Failed to recover log tree");
+-              btrfs_put_root(log_tree_root);
+               return ret;
+       }
+ 
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index 97d517cdf2df75..682d21a73a67a4 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -4297,7 +4297,8 @@ static int prepare_allocation_clustered(struct 
btrfs_fs_info *fs_info,
+ }
+ 
+ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
+-                                  struct find_free_extent_ctl *ffe_ctl)
++                                  struct find_free_extent_ctl *ffe_ctl,
++                                  struct btrfs_space_info *space_info)
+ {
+       if (ffe_ctl->for_treelog) {
+               spin_lock(&fs_info->treelog_bg_lock);
+@@ -4321,6 +4322,7 @@ static int prepare_allocation_zoned(struct btrfs_fs_info 
*fs_info,
+                       u64 avail = block_group->zone_capacity - 
block_group->alloc_offset;
+ 
+                       if (block_group_bits(block_group, ffe_ctl->flags) &&
++                          block_group->space_info == space_info &&
+                           avail >= ffe_ctl->num_bytes) {
+                               ffe_ctl->hint_byte = block_group->start;
+                               break;
+@@ -4342,7 +4344,7 @@ static int prepare_allocation(struct btrfs_fs_info 
*fs_info,
+               return prepare_allocation_clustered(fs_info, ffe_ctl,
+                                                   space_info, ins);
+       case BTRFS_EXTENT_ALLOC_ZONED:
+-              return prepare_allocation_zoned(fs_info, ffe_ctl);
++              return prepare_allocation_zoned(fs_info, ffe_ctl, space_info);
+       default:
+               BUG();
+       }
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index 4031cbdea07400..41da405181b4f0 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -3107,9 +3107,10 @@ int btrfs_finish_one_ordered(struct 
btrfs_ordered_extent *ordered_extent)
+               goto out;
+       }
+ 
+-      if (btrfs_is_zoned(fs_info))
+-              btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
+-                                      ordered_extent->disk_num_bytes);
++      ret = btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
++                                    ordered_extent->disk_num_bytes);
++      if (ret)
++              goto out;
+ 
+       if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
+               truncated = true;
+diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
+index 6776e6ab8d1080..fd4c1ca34b5e47 100644
+--- a/fs/btrfs/scrub.c
++++ b/fs/btrfs/scrub.c
+@@ -1369,8 +1369,7 @@ static void scrub_throttle_dev_io(struct scrub_ctx 
*sctx, struct btrfs_device *d
+        * Slice is divided into intervals when the IO is submitted, adjust by
+        * bwlimit and maximum of 64 intervals.
+        */
+-      div = max_t(u32, 1, (u32)(bwlimit / (16 * 1024 * 1024)));
+-      div = min_t(u32, 64, div);
++      div = clamp(bwlimit / (16 * 1024 * 1024), 1, 64);
+ 
+       /* Start new epoch, set deadline */
+       now = ktime_get();
+diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
+index c5c0d9cf1a8088..a4e486a600bed0 100644
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -1806,7 +1806,7 @@ static noinline int create_pending_snapshot(struct 
btrfs_trans_handle *trans,
+       }
+       /* see comments in should_cow_block() */
+       set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
+-      smp_wmb();
++      smp_mb__after_atomic();
+ 
+       btrfs_set_root_node(new_root_item, tmp);
+       /* record when the snapshot was created in key.offset */
+diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
+index a997c7cc35a26f..30bc8eb28005cd 100644
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -183,6 +183,7 @@ static bool check_prev_ino(struct extent_buffer *leaf,
+       /* Only these key->types needs to be checked */
+       ASSERT(key->type == BTRFS_XATTR_ITEM_KEY ||
+              key->type == BTRFS_INODE_REF_KEY ||
++             key->type == BTRFS_INODE_EXTREF_KEY ||
+              key->type == BTRFS_DIR_INDEX_KEY ||
+              key->type == BTRFS_DIR_ITEM_KEY ||
+              key->type == BTRFS_EXTENT_DATA_KEY);
+@@ -1782,6 +1783,39 @@ static int check_inode_ref(struct extent_buffer *leaf,
+       return 0;
+ }
+ 
++static int check_inode_extref(struct extent_buffer *leaf,
++                            struct btrfs_key *key, struct btrfs_key *prev_key,
++                            int slot)
++{
++      unsigned long ptr = btrfs_item_ptr_offset(leaf, slot);
++      unsigned long end = ptr + btrfs_item_size(leaf, slot);
++
++      if (unlikely(!check_prev_ino(leaf, key, slot, prev_key)))
++              return -EUCLEAN;
++
++      while (ptr < end) {
++              struct btrfs_inode_extref *extref = (struct btrfs_inode_extref 
*)ptr;
++              u16 namelen;
++
++              if (unlikely(ptr + sizeof(*extref) > end)) {
++                      inode_ref_err(leaf, slot,
++                      "inode extref overflow, ptr %lu end %lu inode_extref 
size %zu",
++                                    ptr, end, sizeof(*extref));
++                      return -EUCLEAN;
++              }
++
++              namelen = btrfs_inode_extref_name_len(leaf, extref);
++              if (unlikely(ptr + sizeof(*extref) + namelen > end)) {
++                      inode_ref_err(leaf, slot,
++                              "inode extref overflow, ptr %lu end %lu namelen 
%u",
++                              ptr, end, namelen);
++                      return -EUCLEAN;
++              }
++              ptr += sizeof(*extref) + namelen;
++      }
++      return 0;
++}
++
+ static int check_raid_stripe_extent(const struct extent_buffer *leaf,
+                                   const struct btrfs_key *key, int slot)
+ {
+@@ -1893,6 +1927,9 @@ static enum btrfs_tree_block_status 
check_leaf_item(struct extent_buffer *leaf,
+       case BTRFS_INODE_REF_KEY:
+               ret = check_inode_ref(leaf, key, prev_key, slot);
+               break;
++      case BTRFS_INODE_EXTREF_KEY:
++              ret = check_inode_extref(leaf, key, prev_key, slot);
++              break;
+       case BTRFS_BLOCK_GROUP_ITEM_KEY:
+               ret = check_block_group_item(leaf, key, slot);
+               break;
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 7a63afedd01e6e..165d2ee500ca3b 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -347,6 +347,7 @@ static int process_one_buffer(struct btrfs_root *log,
+                             struct extent_buffer *eb,
+                             struct walk_control *wc, u64 gen, int level)
+ {
++      struct btrfs_trans_handle *trans = wc->trans;
+       struct btrfs_fs_info *fs_info = log->fs_info;
+       int ret = 0;
+ 
+@@ -361,18 +362,29 @@ static int process_one_buffer(struct btrfs_root *log,
+               };
+ 
+               ret = btrfs_read_extent_buffer(eb, &check);
+-              if (ret)
++              if (ret) {
++                      if (trans)
++                              btrfs_abort_transaction(trans, ret);
++                      else
++                              btrfs_handle_fs_error(fs_info, ret, NULL);
+                       return ret;
++              }
+       }
+ 
+       if (wc->pin) {
+-              ret = btrfs_pin_extent_for_log_replay(wc->trans, eb);
+-              if (ret)
++              ASSERT(trans != NULL);
++              ret = btrfs_pin_extent_for_log_replay(trans, eb);
++              if (ret) {
++                      btrfs_abort_transaction(trans, ret);
+                       return ret;
++              }
+ 
+               if (btrfs_buffer_uptodate(eb, gen, 0) &&
+-                  btrfs_header_level(eb) == 0)
++                  btrfs_header_level(eb) == 0) {
+                       ret = btrfs_exclude_logged_extents(eb);
++                      if (ret)
++                              btrfs_abort_transaction(trans, ret);
++              }
+       }
+       return ret;
+ }
+@@ -1784,6 +1796,8 @@ static noinline int link_to_fixup_dir(struct 
btrfs_trans_handle *trans,
+               else
+                       inc_nlink(vfs_inode);
+               ret = btrfs_update_inode(trans, inode);
++              if (ret)
++                      btrfs_abort_transaction(trans, ret);
+       } else if (ret == -EEXIST) {
+               ret = 0;
+       }
+@@ -2449,15 +2463,13 @@ static int replay_one_buffer(struct btrfs_root *log, 
struct extent_buffer *eb,
+       int i;
+       int ret;
+ 
++      if (level != 0)
++              return 0;
++
+       ret = btrfs_read_extent_buffer(eb, &check);
+       if (ret)
+               return ret;
+ 
+-      level = btrfs_header_level(eb);
+-
+-      if (level != 0)
+-              return 0;
+-
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+@@ -2630,15 +2642,24 @@ static int unaccount_log_buffer(struct btrfs_fs_info 
*fs_info, u64 start)
+ static int clean_log_buffer(struct btrfs_trans_handle *trans,
+                           struct extent_buffer *eb)
+ {
++      int ret;
++
+       btrfs_tree_lock(eb);
+       btrfs_clear_buffer_dirty(trans, eb);
+       wait_on_extent_buffer_writeback(eb);
+       btrfs_tree_unlock(eb);
+ 
+-      if (trans)
+-              return btrfs_pin_reserved_extent(trans, eb);
++      if (trans) {
++              ret = btrfs_pin_reserved_extent(trans, eb);
++              if (ret)
++                      btrfs_abort_transaction(trans, ret);
++              return ret;
++      }
+ 
+-      return unaccount_log_buffer(eb->fs_info, eb->start);
++      ret = unaccount_log_buffer(eb->fs_info, eb->start);
++      if (ret)
++              btrfs_handle_fs_error(eb->fs_info, ret, NULL);
++      return ret;
+ }
+ 
+ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
+@@ -2674,8 +2695,14 @@ static noinline int walk_down_log_tree(struct 
btrfs_trans_handle *trans,
+               next = btrfs_find_create_tree_block(fs_info, bytenr,
+                                                   btrfs_header_owner(cur),
+                                                   *level - 1);
+-              if (IS_ERR(next))
+-                      return PTR_ERR(next);
++              if (IS_ERR(next)) {
++                      ret = PTR_ERR(next);
++                      if (trans)
++                              btrfs_abort_transaction(trans, ret);
++                      else
++                              btrfs_handle_fs_error(fs_info, ret, NULL);
++                      return ret;
++              }
+ 
+               if (*level == 1) {
+                       ret = wc->process_func(root, next, wc, ptr_gen,
+@@ -2690,6 +2717,10 @@ static noinline int walk_down_log_tree(struct 
btrfs_trans_handle *trans,
+                               ret = btrfs_read_extent_buffer(next, &check);
+                               if (ret) {
+                                       free_extent_buffer(next);
++                                      if (trans)
++                                              btrfs_abort_transaction(trans, 
ret);
++                                      else
++                                              btrfs_handle_fs_error(fs_info, 
ret, NULL);
+                                       return ret;
+                               }
+ 
+@@ -2705,6 +2736,10 @@ static noinline int walk_down_log_tree(struct 
btrfs_trans_handle *trans,
+               ret = btrfs_read_extent_buffer(next, &check);
+               if (ret) {
+                       free_extent_buffer(next);
++                      if (trans)
++                              btrfs_abort_transaction(trans, ret);
++                      else
++                              btrfs_handle_fs_error(fs_info, ret, NULL);
+                       return ret;
+               }
+ 
+@@ -7434,7 +7469,6 @@ int btrfs_recover_log_trees(struct btrfs_root 
*log_root_tree)
+ 
+       log_root_tree->log_root = NULL;
+       clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags);
+-      btrfs_put_root(log_root_tree);
+ 
+       return 0;
+ error:
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 87c5dd3ad016e4..fcdf7b058a584c 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -2464,16 +2464,17 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices 
*fs_devices, u64 flags)
+       return ret;
+ }
+ 
+-void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 
length)
++int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 
length)
+ {
+       struct btrfs_block_group *block_group;
+       u64 min_alloc_bytes;
+ 
+       if (!btrfs_is_zoned(fs_info))
+-              return;
++              return 0;
+ 
+       block_group = btrfs_lookup_block_group(fs_info, logical);
+-      ASSERT(block_group);
++      if (WARN_ON_ONCE(!block_group))
++              return -ENOENT;
+ 
+       /* No MIXED_BG on zoned btrfs. */
+       if (block_group->flags & BTRFS_BLOCK_GROUP_DATA)
+@@ -2490,6 +2491,7 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info 
*fs_info, u64 logical, u64 len
+ 
+ out:
+       btrfs_put_block_group(block_group);
++      return 0;
+ }
+ 
+ static void btrfs_zone_finish_endio_workfn(struct work_struct *work)
+diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
+index 6e11533b8e14c2..17c5656580dd97 100644
+--- a/fs/btrfs/zoned.h
++++ b/fs/btrfs/zoned.h
+@@ -83,7 +83,7 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device 
*tgt_dev, u64 logical,
+ bool btrfs_zone_activate(struct btrfs_block_group *block_group);
+ int btrfs_zone_finish(struct btrfs_block_group *block_group);
+ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags);
+-void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
++int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
+                            u64 length);
+ void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
+                                  struct extent_buffer *eb);
+@@ -234,8 +234,11 @@ static inline bool btrfs_can_activate_zone(struct 
btrfs_fs_devices *fs_devices,
+       return true;
+ }
+ 
+-static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
+-                                         u64 logical, u64 length) { }
++static inline int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
++                                         u64 logical, u64 length)
++{
++      return 0;
++}
+ 
+ static inline void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
+                                                struct extent_buffer *eb) { }
+diff --git a/include/linux/audit.h b/include/linux/audit.h
+index a394614ccd0b81..e3f06eba9c6e6e 100644
+--- a/include/linux/audit.h
++++ b/include/linux/audit.h
+@@ -527,7 +527,7 @@ static inline void audit_log_kern_module(const char *name)
+ 
+ static inline void audit_fanotify(u32 response, struct 
fanotify_response_info_audit_rule *friar)
+ {
+-      if (!audit_dummy_context())
++      if (audit_enabled)
+               __audit_fanotify(response, friar);
+ }
+ 
+diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
+index fef93032fe7e4d..fd890b34a84038 100644
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -1728,11 +1728,7 @@ static int update_parent_effective_cpumask(struct 
cpuset *cs, int cmd,
+               if (prstate_housekeeping_conflict(new_prs, xcpus))
+                       return PERR_HKEEPING;
+ 
+-              /*
+-               * A parent can be left with no CPU as long as there is no
+-               * task directly associated with the parent partition.
+-               */
+-              if (nocpu)
++              if (tasks_nocpu_error(parent, cs, xcpus))
+                       return PERR_NOCPUS;
+ 
+               /*
+diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
+index 6c83ad674d0104..2609998ca07f1f 100644
+--- a/kernel/events/callchain.c
++++ b/kernel/events/callchain.c
+@@ -224,6 +224,10 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, 
bool kernel, bool user,
+       struct perf_callchain_entry_ctx ctx;
+       int rctx, start_entry_idx;
+ 
++      /* crosstask is not supported for user stacks */
++      if (crosstask && user && !kernel)
++              return NULL;
++
+       entry = get_callchain_entry(&rctx);
+       if (!entry)
+               return NULL;
+@@ -240,18 +244,15 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, 
bool kernel, bool user,
+               perf_callchain_kernel(&ctx, regs);
+       }
+ 
+-      if (user) {
++      if (user && !crosstask) {
+               if (!user_mode(regs)) {
+-                      if  (current->mm)
+-                              regs = task_pt_regs(current);
+-                      else
++                      if (current->flags & (PF_KTHREAD | PF_USER_WORKER))
+                               regs = NULL;
++                      else
++                              regs = task_pt_regs(current);
+               }
+ 
+               if (regs) {
+-                      if (crosstask)
+-                              goto exit_put;
+-
+                       if (add_mark)
+                               perf_callchain_store_context(&ctx, 
PERF_CONTEXT_USER);
+ 
+@@ -261,7 +262,6 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool 
kernel, bool user,
+               }
+       }
+ 
+-exit_put:
+       put_callchain_entry(rctx);
+ 
+       return entry;
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index 6e9427c4aaff70..c0e938d28758f5 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -7440,7 +7440,7 @@ static void perf_sample_regs_user(struct perf_regs 
*regs_user,
+       if (user_mode(regs)) {
+               regs_user->abi = perf_reg_abi(current);
+               regs_user->regs = regs;
+-      } else if (!(current->flags & PF_KTHREAD)) {
++      } else if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER))) {
+               perf_get_regs_user(regs_user, regs);
+       } else {
+               regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
+@@ -8080,7 +8080,7 @@ static u64 perf_virt_to_phys(u64 virt)
+                * Try IRQ-safe get_user_page_fast_only first.
+                * If failed, leave phys_addr as 0.
+                */
+-              if (current->mm != NULL) {
++              if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER))) {
+                       struct page *p;
+ 
+                       pagefault_disable();
+@@ -8192,7 +8192,8 @@ struct perf_callchain_entry *
+ perf_callchain(struct perf_event *event, struct pt_regs *regs)
+ {
+       bool kernel = !event->attr.exclude_callchain_kernel;
+-      bool user   = !event->attr.exclude_callchain_user;
++      bool user   = !event->attr.exclude_callchain_user &&
++              !(current->flags & (PF_KTHREAD | PF_USER_WORKER));
+       /* Disallow cross-task user callchains. */
+       bool crosstask = event->ctx->task && event->ctx->task != current;
+       const u32 max_stack = event->attr.sample_max_stack;
+diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
+index 3ffa0d80ddd19c..d1917b28761a33 100644
+--- a/kernel/irq/chip.c
++++ b/kernel/irq/chip.c
+@@ -1030,7 +1030,7 @@ __irq_do_set_handler(struct irq_desc *desc, 
irq_flow_handler_t handle,
+ void __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int 
is_chained,
+                      const char *name)
+ {
+-      scoped_irqdesc_get_and_lock(irq, 0)
++      scoped_irqdesc_get_and_buslock(irq, 0)
+               __irq_do_set_handler(scoped_irqdesc, handle, is_chained, name);
+ }
+ EXPORT_SYMBOL_GPL(__irq_set_handler);
+diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
+index c94837382037e4..400856abf67219 100644
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -659,7 +659,7 @@ void __disable_irq(struct irq_desc *desc)
+ 
+ static int __disable_irq_nosync(unsigned int irq)
+ {
+-      scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) {
++      scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) {
+               __disable_irq(scoped_irqdesc);
+               return 0;
+       }
+@@ -789,7 +789,7 @@ void __enable_irq(struct irq_desc *desc)
+  */
+ void enable_irq(unsigned int irq)
+ {
+-      scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) {
++      scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) {
+               struct irq_desc *desc = scoped_irqdesc;
+ 
+               if (WARN(!desc->irq_data.chip, "enable_irq before 
setup/request_irq: irq %u\n", irq))
+diff --git a/kernel/sched/build_policy.c b/kernel/sched/build_policy.c
+index c4a488e67aa7d8..755883faf75186 100644
+--- a/kernel/sched/build_policy.c
++++ b/kernel/sched/build_policy.c
+@@ -58,6 +58,7 @@
+ #include "deadline.c"
+ 
+ #ifdef CONFIG_SCHED_CLASS_EXT
++# include "ext_internal.h"
+ # include "ext.c"
+ # include "ext_idle.c"
+ #endif
+diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
+index 088ceff38c8a47..14724dae0b7951 100644
+--- a/kernel/sched/ext.c
++++ b/kernel/sched/ext.c
+@@ -9,1040 +9,6 @@
+ #include <linux/btf_ids.h>
+ #include "ext_idle.h"
+ 
+-#define SCX_OP_IDX(op)                (offsetof(struct sched_ext_ops, op) / 
sizeof(void (*)(void)))
+-
+-enum scx_consts {
+-      SCX_DSP_DFL_MAX_BATCH           = 32,
+-      SCX_DSP_MAX_LOOPS               = 32,
+-      SCX_WATCHDOG_MAX_TIMEOUT        = 30 * HZ,
+-
+-      SCX_EXIT_BT_LEN                 = 64,
+-      SCX_EXIT_MSG_LEN                = 1024,
+-      SCX_EXIT_DUMP_DFL_LEN           = 32768,
+-
+-      SCX_CPUPERF_ONE                 = SCHED_CAPACITY_SCALE,
+-
+-      /*
+-       * Iterating all tasks may take a while. Periodically drop
+-       * scx_tasks_lock to avoid causing e.g. CSD and RCU stalls.
+-       */
+-      SCX_TASK_ITER_BATCH             = 32,
+-};
+-
+-enum scx_exit_kind {
+-      SCX_EXIT_NONE,
+-      SCX_EXIT_DONE,
+-
+-      SCX_EXIT_UNREG = 64,    /* user-space initiated unregistration */
+-      SCX_EXIT_UNREG_BPF,     /* BPF-initiated unregistration */
+-      SCX_EXIT_UNREG_KERN,    /* kernel-initiated unregistration */
+-      SCX_EXIT_SYSRQ,         /* requested by 'S' sysrq */
+-
+-      SCX_EXIT_ERROR = 1024,  /* runtime error, error msg contains details */
+-      SCX_EXIT_ERROR_BPF,     /* ERROR but triggered through scx_bpf_error() 
*/
+-      SCX_EXIT_ERROR_STALL,   /* watchdog detected stalled runnable tasks */
+-};
+-
+-/*
+- * An exit code can be specified when exiting with scx_bpf_exit() or 
scx_exit(),
+- * corresponding to exit_kind UNREG_BPF and UNREG_KERN respectively. The codes
+- * are 64bit of the format:
+- *
+- *   Bits: [63  ..  48 47   ..  32 31 .. 0]
+- *         [ SYS ACT ] [ SYS RSN ] [ USR  ]
+- *
+- *   SYS ACT: System-defined exit actions
+- *   SYS RSN: System-defined exit reasons
+- *   USR    : User-defined exit codes and reasons
+- *
+- * Using the above, users may communicate intention and context by ORing 
system
+- * actions and/or system reasons with a user-defined exit code.
+- */
+-enum scx_exit_code {
+-      /* Reasons */
+-      SCX_ECODE_RSN_HOTPLUG   = 1LLU << 32,
+-
+-      /* Actions */
+-      SCX_ECODE_ACT_RESTART   = 1LLU << 48,
+-};
+-
+-/*
+- * scx_exit_info is passed to ops.exit() to describe why the BPF scheduler is
+- * being disabled.
+- */
+-struct scx_exit_info {
+-      /* %SCX_EXIT_* - broad category of the exit reason */
+-      enum scx_exit_kind      kind;
+-
+-      /* exit code if gracefully exiting */
+-      s64                     exit_code;
+-
+-      /* textual representation of the above */
+-      const char              *reason;
+-
+-      /* backtrace if exiting due to an error */
+-      unsigned long           *bt;
+-      u32                     bt_len;
+-
+-      /* informational message */
+-      char                    *msg;
+-
+-      /* debug dump */
+-      char                    *dump;
+-};
+-
+-/* sched_ext_ops.flags */
+-enum scx_ops_flags {
+-      /*
+-       * Keep built-in idle tracking even if ops.update_idle() is implemented.
+-       */
+-      SCX_OPS_KEEP_BUILTIN_IDLE       = 1LLU << 0,
+-
+-      /*
+-       * By default, if there are no other task to run on the CPU, ext core
+-       * keeps running the current task even after its slice expires. If this
+-       * flag is specified, such tasks are passed to ops.enqueue() with
+-       * %SCX_ENQ_LAST. See the comment above %SCX_ENQ_LAST for more info.
+-       */
+-      SCX_OPS_ENQ_LAST                = 1LLU << 1,
+-
+-      /*
+-       * An exiting task may schedule after PF_EXITING is set. In such cases,
+-       * bpf_task_from_pid() may not be able to find the task and if the BPF
+-       * scheduler depends on pid lookup for dispatching, the task will be
+-       * lost leading to various issues including RCU grace period stalls.
+-       *
+-       * To mask this problem, by default, unhashed tasks are automatically
+-       * dispatched to the local DSQ on enqueue. If the BPF scheduler doesn't
+-       * depend on pid lookups and wants to handle these tasks directly, the
+-       * following flag can be used.
+-       */
+-      SCX_OPS_ENQ_EXITING             = 1LLU << 2,
+-
+-      /*
+-       * If set, only tasks with policy set to SCHED_EXT are attached to
+-       * sched_ext. If clear, SCHED_NORMAL tasks are also included.
+-       */
+-      SCX_OPS_SWITCH_PARTIAL          = 1LLU << 3,
+-
+-      /*
+-       * A migration disabled task can only execute on its current CPU. By
+-       * default, such tasks are automatically put on the CPU's local DSQ with
+-       * the default slice on enqueue. If this ops flag is set, they also go
+-       * through ops.enqueue().
+-       *
+-       * A migration disabled task never invokes ops.select_cpu() as it can
+-       * only select the current CPU. Also, p->cpus_ptr will only contain its
+-       * current CPU while p->nr_cpus_allowed keeps tracking p->user_cpus_ptr
+-       * and thus may disagree with cpumask_weight(p->cpus_ptr).
+-       */
+-      SCX_OPS_ENQ_MIGRATION_DISABLED  = 1LLU << 4,
+-
+-      /*
+-       * Queued wakeup (ttwu_queue) is a wakeup optimization that invokes
+-       * ops.enqueue() on the ops.select_cpu() selected or the wakee's
+-       * previous CPU via IPI (inter-processor interrupt) to reduce cacheline
+-       * transfers. When this optimization is enabled, ops.select_cpu() is
+-       * skipped in some cases (when racing against the wakee switching out).
+-       * As the BPF scheduler may depend on ops.select_cpu() being invoked
+-       * during wakeups, queued wakeup is disabled by default.
+-       *
+-       * If this ops flag is set, queued wakeup optimization is enabled and
+-       * the BPF scheduler must be able to handle ops.enqueue() invoked on the
+-       * wakee's CPU without preceding ops.select_cpu() even for tasks which
+-       * may be executed on multiple CPUs.
+-       */
+-      SCX_OPS_ALLOW_QUEUED_WAKEUP     = 1LLU << 5,
+-
+-      /*
+-       * If set, enable per-node idle cpumasks. If clear, use a single global
+-       * flat idle cpumask.
+-       */
+-      SCX_OPS_BUILTIN_IDLE_PER_NODE   = 1LLU << 6,
+-
+-      /*
+-       * CPU cgroup support flags
+-       */
+-      SCX_OPS_HAS_CGROUP_WEIGHT       = 1LLU << 16,   /* DEPRECATED, will be 
removed on 6.18 */
+-
+-      SCX_OPS_ALL_FLAGS               = SCX_OPS_KEEP_BUILTIN_IDLE |
+-                                        SCX_OPS_ENQ_LAST |
+-                                        SCX_OPS_ENQ_EXITING |
+-                                        SCX_OPS_ENQ_MIGRATION_DISABLED |
+-                                        SCX_OPS_ALLOW_QUEUED_WAKEUP |
+-                                        SCX_OPS_SWITCH_PARTIAL |
+-                                        SCX_OPS_BUILTIN_IDLE_PER_NODE |
+-                                        SCX_OPS_HAS_CGROUP_WEIGHT,
+-
+-      /* high 8 bits are internal, don't include in SCX_OPS_ALL_FLAGS */
+-      __SCX_OPS_INTERNAL_MASK         = 0xffLLU << 56,
+-
+-      SCX_OPS_HAS_CPU_PREEMPT         = 1LLU << 56,
+-};
+-
+-/* argument container for ops.init_task() */
+-struct scx_init_task_args {
+-      /*
+-       * Set if ops.init_task() is being invoked on the fork path, as opposed
+-       * to the scheduler transition path.
+-       */
+-      bool                    fork;
+-#ifdef CONFIG_EXT_GROUP_SCHED
+-      /* the cgroup the task is joining */
+-      struct cgroup           *cgroup;
+-#endif
+-};
+-
+-/* argument container for ops.exit_task() */
+-struct scx_exit_task_args {
+-      /* Whether the task exited before running on sched_ext. */
+-      bool cancelled;
+-};
+-
+-/* argument container for ops->cgroup_init() */
+-struct scx_cgroup_init_args {
+-      /* the weight of the cgroup [1..10000] */
+-      u32                     weight;
+-
+-      /* bandwidth control parameters from cpu.max and cpu.max.burst */
+-      u64                     bw_period_us;
+-      u64                     bw_quota_us;
+-      u64                     bw_burst_us;
+-};
+-
+-enum scx_cpu_preempt_reason {
+-      /* next task is being scheduled by &sched_class_rt */
+-      SCX_CPU_PREEMPT_RT,
+-      /* next task is being scheduled by &sched_class_dl */
+-      SCX_CPU_PREEMPT_DL,
+-      /* next task is being scheduled by &sched_class_stop */
+-      SCX_CPU_PREEMPT_STOP,
+-      /* unknown reason for SCX being preempted */
+-      SCX_CPU_PREEMPT_UNKNOWN,
+-};
+-
+-/*
+- * Argument container for ops->cpu_acquire(). Currently empty, but may be
+- * expanded in the future.
+- */
+-struct scx_cpu_acquire_args {};
+-
+-/* argument container for ops->cpu_release() */
+-struct scx_cpu_release_args {
+-      /* the reason the CPU was preempted */
+-      enum scx_cpu_preempt_reason reason;
+-
+-      /* the task that's going to be scheduled on the CPU */
+-      struct task_struct      *task;
+-};
+-
+-/*
+- * Informational context provided to dump operations.
+- */
+-struct scx_dump_ctx {
+-      enum scx_exit_kind      kind;
+-      s64                     exit_code;
+-      const char              *reason;
+-      u64                     at_ns;
+-      u64                     at_jiffies;
+-};
+-
+-/**
+- * struct sched_ext_ops - Operation table for BPF scheduler implementation
+- *
+- * A BPF scheduler can implement an arbitrary scheduling policy by
+- * implementing and loading operations in this table. Note that a userland
+- * scheduling policy can also be implemented using the BPF scheduler
+- * as a shim layer.
+- */
+-struct sched_ext_ops {
+-      /**
+-       * @select_cpu: Pick the target CPU for a task which is being woken up
+-       * @p: task being woken up
+-       * @prev_cpu: the cpu @p was on before sleeping
+-       * @wake_flags: SCX_WAKE_*
+-       *
+-       * Decision made here isn't final. @p may be moved to any CPU while it
+-       * is getting dispatched for execution later. However, as @p is not on
+-       * the rq at this point, getting the eventual execution CPU right here
+-       * saves a small bit of overhead down the line.
+-       *
+-       * If an idle CPU is returned, the CPU is kicked and will try to
+-       * dispatch. While an explicit custom mechanism can be added,
+-       * select_cpu() serves as the default way to wake up idle CPUs.
+-       *
+-       * @p may be inserted into a DSQ directly by calling
+-       * scx_bpf_dsq_insert(). If so, the ops.enqueue() will be skipped.
+-       * Directly inserting into %SCX_DSQ_LOCAL will put @p in the local DSQ
+-       * of the CPU returned by this operation.
+-       *
+-       * Note that select_cpu() is never called for tasks that can only run
+-       * on a single CPU or tasks with migration disabled, as they don't have
+-       * the option to select a different CPU. See select_task_rq() for
+-       * details.
+-       */
+-      s32 (*select_cpu)(struct task_struct *p, s32 prev_cpu, u64 wake_flags);
+-
+-      /**
+-       * @enqueue: Enqueue a task on the BPF scheduler
+-       * @p: task being enqueued
+-       * @enq_flags: %SCX_ENQ_*
+-       *
+-       * @p is ready to run. Insert directly into a DSQ by calling
+-       * scx_bpf_dsq_insert() or enqueue on the BPF scheduler. If not directly
+-       * inserted, the bpf scheduler owns @p and if it fails to dispatch @p,
+-       * the task will stall.
+-       *
+-       * If @p was inserted into a DSQ from ops.select_cpu(), this callback is
+-       * skipped.
+-       */
+-      void (*enqueue)(struct task_struct *p, u64 enq_flags);
+-
+-      /**
+-       * @dequeue: Remove a task from the BPF scheduler
+-       * @p: task being dequeued
+-       * @deq_flags: %SCX_DEQ_*
+-       *
+-       * Remove @p from the BPF scheduler. This is usually called to isolate
+-       * the task while updating its scheduling properties (e.g. priority).
+-       *
+-       * The ext core keeps track of whether the BPF side owns a given task or
+-       * not and can gracefully ignore spurious dispatches from BPF side,
+-       * which makes it safe to not implement this method. However, depending
+-       * on the scheduling logic, this can lead to confusing behaviors - e.g.
+-       * scheduling position not being updated across a priority change.
+-       */
+-      void (*dequeue)(struct task_struct *p, u64 deq_flags);
+-
+-      /**
+-       * @dispatch: Dispatch tasks from the BPF scheduler and/or user DSQs
+-       * @cpu: CPU to dispatch tasks for
+-       * @prev: previous task being switched out
+-       *
+-       * Called when a CPU's local dsq is empty. The operation should dispatch
+-       * one or more tasks from the BPF scheduler into the DSQs using
+-       * scx_bpf_dsq_insert() and/or move from user DSQs into the local DSQ
+-       * using scx_bpf_dsq_move_to_local().
+-       *
+-       * The maximum number of times scx_bpf_dsq_insert() can be called
+-       * without an intervening scx_bpf_dsq_move_to_local() is specified by
+-       * ops.dispatch_max_batch. See the comments on top of the two functions
+-       * for more details.
+-       *
+-       * When not %NULL, @prev is an SCX task with its slice depleted. If
+-       * @prev is still runnable as indicated by set %SCX_TASK_QUEUED in
+-       * @prev->scx.flags, it is not enqueued yet and will be enqueued after
+-       * ops.dispatch() returns. To keep executing @prev, return without
+-       * dispatching or moving any tasks. Also see %SCX_OPS_ENQ_LAST.
+-       */
+-      void (*dispatch)(s32 cpu, struct task_struct *prev);
+-
+-      /**
+-       * @tick: Periodic tick
+-       * @p: task running currently
+-       *
+-       * This operation is called every 1/HZ seconds on CPUs which are
+-       * executing an SCX task. Setting @p->scx.slice to 0 will trigger an
+-       * immediate dispatch cycle on the CPU.
+-       */
+-      void (*tick)(struct task_struct *p);
+-
+-      /**
+-       * @runnable: A task is becoming runnable on its associated CPU
+-       * @p: task becoming runnable
+-       * @enq_flags: %SCX_ENQ_*
+-       *
+-       * This and the following three functions can be used to track a task's
+-       * execution state transitions. A task becomes ->runnable() on a CPU,
+-       * and then goes through one or more ->running() and ->stopping() pairs
+-       * as it runs on the CPU, and eventually becomes ->quiescent() when it's
+-       * done running on the CPU.
+-       *
+-       * @p is becoming runnable on the CPU because it's
+-       *
+-       * - waking up (%SCX_ENQ_WAKEUP)
+-       * - being moved from another CPU
+-       * - being restored after temporarily taken off the queue for an
+-       *   attribute change.
+-       *
+-       * This and ->enqueue() are related but not coupled. This operation
+-       * notifies @p's state transition and may not be followed by ->enqueue()
+-       * e.g. when @p is being dispatched to a remote CPU, or when @p is
+-       * being enqueued on a CPU experiencing a hotplug event. Likewise, a
+-       * task may be ->enqueue()'d without being preceded by this operation
+-       * e.g. after exhausting its slice.
+-       */
+-      void (*runnable)(struct task_struct *p, u64 enq_flags);
+-
+-      /**
+-       * @running: A task is starting to run on its associated CPU
+-       * @p: task starting to run
+-       *
+-       * Note that this callback may be called from a CPU other than the
+-       * one the task is going to run on. This can happen when a task
+-       * property is changed (i.e., affinity), since scx_next_task_scx(),
+-       * which triggers this callback, may run on a CPU different from
+-       * the task's assigned CPU.
+-       *
+-       * Therefore, always use scx_bpf_task_cpu(@p) to determine the
+-       * target CPU the task is going to use.
+-       *
+-       * See ->runnable() for explanation on the task state notifiers.
+-       */
+-      void (*running)(struct task_struct *p);
+-
+-      /**
+-       * @stopping: A task is stopping execution
+-       * @p: task stopping to run
+-       * @runnable: is task @p still runnable?
+-       *
+-       * Note that this callback may be called from a CPU other than the
+-       * one the task was running on. This can happen when a task
+-       * property is changed (i.e., affinity), since dequeue_task_scx(),
+-       * which triggers this callback, may run on a CPU different from
+-       * the task's assigned CPU.
+-       *
+-       * Therefore, always use scx_bpf_task_cpu(@p) to retrieve the CPU
+-       * the task was running on.
+-       *
+-       * See ->runnable() for explanation on the task state notifiers. If
+-       * !@runnable, ->quiescent() will be invoked after this operation
+-       * returns.
+-       */
+-      void (*stopping)(struct task_struct *p, bool runnable);
+-
+-      /**
+-       * @quiescent: A task is becoming not runnable on its associated CPU
+-       * @p: task becoming not runnable
+-       * @deq_flags: %SCX_DEQ_*
+-       *
+-       * See ->runnable() for explanation on the task state notifiers.
+-       *
+-       * @p is becoming quiescent on the CPU because it's
+-       *
+-       * - sleeping (%SCX_DEQ_SLEEP)
+-       * - being moved to another CPU
+-       * - being temporarily taken off the queue for an attribute change
+-       *   (%SCX_DEQ_SAVE)
+-       *
+-       * This and ->dequeue() are related but not coupled. This operation
+-       * notifies @p's state transition and may not be preceded by ->dequeue()
+-       * e.g. when @p is being dispatched to a remote CPU.
+-       */
+-      void (*quiescent)(struct task_struct *p, u64 deq_flags);
+-
+-      /**
+-       * @yield: Yield CPU
+-       * @from: yielding task
+-       * @to: optional yield target task
+-       *
+-       * If @to is NULL, @from is yielding the CPU to other runnable tasks.
+-       * The BPF scheduler should ensure that other available tasks are
+-       * dispatched before the yielding task. Return value is ignored in this
+-       * case.
+-       *
+-       * If @to is not-NULL, @from wants to yield the CPU to @to. If the bpf
+-       * scheduler can implement the request, return %true; otherwise, %false.
+-       */
+-      bool (*yield)(struct task_struct *from, struct task_struct *to);
+-
+-      /**
+-       * @core_sched_before: Task ordering for core-sched
+-       * @a: task A
+-       * @b: task B
+-       *
+-       * Used by core-sched to determine the ordering between two tasks. See
+-       * Documentation/admin-guide/hw-vuln/core-scheduling.rst for details on
+-       * core-sched.
+-       *
+-       * Both @a and @b are runnable and may or may not currently be queued on
+-       * the BPF scheduler. Should return %true if @a should run before @b.
+-       * %false if there's no required ordering or @b should run before @a.
+-       *
+-       * If not specified, the default is ordering them according to when they
+-       * became runnable.
+-       */
+-      bool (*core_sched_before)(struct task_struct *a, struct task_struct *b);
+-
+-      /**
+-       * @set_weight: Set task weight
+-       * @p: task to set weight for
+-       * @weight: new weight [1..10000]
+-       *
+-       * Update @p's weight to @weight.
+-       */
+-      void (*set_weight)(struct task_struct *p, u32 weight);
+-
+-      /**
+-       * @set_cpumask: Set CPU affinity
+-       * @p: task to set CPU affinity for
+-       * @cpumask: cpumask of cpus that @p can run on
+-       *
+-       * Update @p's CPU affinity to @cpumask.
+-       */
+-      void (*set_cpumask)(struct task_struct *p,
+-                          const struct cpumask *cpumask);
+-
+-      /**
+-       * @update_idle: Update the idle state of a CPU
+-       * @cpu: CPU to update the idle state for
+-       * @idle: whether entering or exiting the idle state
+-       *
+-       * This operation is called when @rq's CPU goes or leaves the idle
+-       * state. By default, implementing this operation disables the built-in
+-       * idle CPU tracking and the following helpers become unavailable:
+-       *
+-       * - scx_bpf_select_cpu_dfl()
+-       * - scx_bpf_select_cpu_and()
+-       * - scx_bpf_test_and_clear_cpu_idle()
+-       * - scx_bpf_pick_idle_cpu()
+-       *
+-       * The user also must implement ops.select_cpu() as the default
+-       * implementation relies on scx_bpf_select_cpu_dfl().
+-       *
+-       * Specify the %SCX_OPS_KEEP_BUILTIN_IDLE flag to keep the built-in idle
+-       * tracking.
+-       */
+-      void (*update_idle)(s32 cpu, bool idle);
+-
+-      /**
+-       * @cpu_acquire: A CPU is becoming available to the BPF scheduler
+-       * @cpu: The CPU being acquired by the BPF scheduler.
+-       * @args: Acquire arguments, see the struct definition.
+-       *
+-       * A CPU that was previously released from the BPF scheduler is now once
+-       * again under its control.
+-       */
+-      void (*cpu_acquire)(s32 cpu, struct scx_cpu_acquire_args *args);
+-
+-      /**
+-       * @cpu_release: A CPU is taken away from the BPF scheduler
+-       * @cpu: The CPU being released by the BPF scheduler.
+-       * @args: Release arguments, see the struct definition.
+-       *
+-       * The specified CPU is no longer under the control of the BPF
+-       * scheduler. This could be because it was preempted by a higher
+-       * priority sched_class, though there may be other reasons as well. The
+-       * caller should consult @args->reason to determine the cause.
+-       */
+-      void (*cpu_release)(s32 cpu, struct scx_cpu_release_args *args);
+-
+-      /**
+-       * @init_task: Initialize a task to run in a BPF scheduler
+-       * @p: task to initialize for BPF scheduling
+-       * @args: init arguments, see the struct definition
+-       *
+-       * Either we're loading a BPF scheduler or a new task is being forked.
+-       * Initialize @p for BPF scheduling. This operation may block and can
+-       * be used for allocations, and is called exactly once for a task.
+-       *
+-       * Return 0 for success, -errno for failure. An error return while
+-       * loading will abort loading of the BPF scheduler. During a fork, it
+-       * will abort that specific fork.
+-       */
+-      s32 (*init_task)(struct task_struct *p, struct scx_init_task_args 
*args);
+-
+-      /**
+-       * @exit_task: Exit a previously-running task from the system
+-       * @p: task to exit
+-       * @args: exit arguments, see the struct definition
+-       *
+-       * @p is exiting or the BPF scheduler is being unloaded. Perform any
+-       * necessary cleanup for @p.
+-       */
+-      void (*exit_task)(struct task_struct *p, struct scx_exit_task_args 
*args);
+-
+-      /**
+-       * @enable: Enable BPF scheduling for a task
+-       * @p: task to enable BPF scheduling for
+-       *
+-       * Enable @p for BPF scheduling. enable() is called on @p any time it
+-       * enters SCX, and is always paired with a matching disable().
+-       */
+-      void (*enable)(struct task_struct *p);
+-
+-      /**
+-       * @disable: Disable BPF scheduling for a task
+-       * @p: task to disable BPF scheduling for
+-       *
+-       * @p is exiting, leaving SCX or the BPF scheduler is being unloaded.
+-       * Disable BPF scheduling for @p. A disable() call is always matched
+-       * with a prior enable() call.
+-       */
+-      void (*disable)(struct task_struct *p);
+-
+-      /**
+-       * @dump: Dump BPF scheduler state on error
+-       * @ctx: debug dump context
+-       *
+-       * Use scx_bpf_dump() to generate BPF scheduler specific debug dump.
+-       */
+-      void (*dump)(struct scx_dump_ctx *ctx);
+-
+-      /**
+-       * @dump_cpu: Dump BPF scheduler state for a CPU on error
+-       * @ctx: debug dump context
+-       * @cpu: CPU to generate debug dump for
+-       * @idle: @cpu is currently idle without any runnable tasks
+-       *
+-       * Use scx_bpf_dump() to generate BPF scheduler specific debug dump for
+-       * @cpu. If @idle is %true and this operation doesn't produce any
+-       * output, @cpu is skipped for dump.
+-       */
+-      void (*dump_cpu)(struct scx_dump_ctx *ctx, s32 cpu, bool idle);
+-
+-      /**
+-       * @dump_task: Dump BPF scheduler state for a runnable task on error
+-       * @ctx: debug dump context
+-       * @p: runnable task to generate debug dump for
+-       *
+-       * Use scx_bpf_dump() to generate BPF scheduler specific debug dump for
+-       * @p.
+-       */
+-      void (*dump_task)(struct scx_dump_ctx *ctx, struct task_struct *p);
+-
+-#ifdef CONFIG_EXT_GROUP_SCHED
+-      /**
+-       * @cgroup_init: Initialize a cgroup
+-       * @cgrp: cgroup being initialized
+-       * @args: init arguments, see the struct definition
+-       *
+-       * Either the BPF scheduler is being loaded or @cgrp created, initialize
+-       * @cgrp for sched_ext. This operation may block.
+-       *
+-       * Return 0 for success, -errno for failure. An error return while
+-       * loading will abort loading of the BPF scheduler. During cgroup
+-       * creation, it will abort the specific cgroup creation.
+-       */
+-      s32 (*cgroup_init)(struct cgroup *cgrp,
+-                         struct scx_cgroup_init_args *args);
+-
+-      /**
+-       * @cgroup_exit: Exit a cgroup
+-       * @cgrp: cgroup being exited
+-       *
+-       * Either the BPF scheduler is being unloaded or @cgrp destroyed, exit
+-       * @cgrp for sched_ext. This operation my block.
+-       */
+-      void (*cgroup_exit)(struct cgroup *cgrp);
+-
+-      /**
+-       * @cgroup_prep_move: Prepare a task to be moved to a different cgroup
+-       * @p: task being moved
+-       * @from: cgroup @p is being moved from
+-       * @to: cgroup @p is being moved to
+-       *
+-       * Prepare @p for move from cgroup @from to @to. This operation may
+-       * block and can be used for allocations.
+-       *
+-       * Return 0 for success, -errno for failure. An error return aborts the
+-       * migration.
+-       */
+-      s32 (*cgroup_prep_move)(struct task_struct *p,
+-                              struct cgroup *from, struct cgroup *to);
+-
+-      /**
+-       * @cgroup_move: Commit cgroup move
+-       * @p: task being moved
+-       * @from: cgroup @p is being moved from
+-       * @to: cgroup @p is being moved to
+-       *
+-       * Commit the move. @p is dequeued during this operation.
+-       */
+-      void (*cgroup_move)(struct task_struct *p,
+-                          struct cgroup *from, struct cgroup *to);
+-
+-      /**
+-       * @cgroup_cancel_move: Cancel cgroup move
+-       * @p: task whose cgroup move is being canceled
+-       * @from: cgroup @p was being moved from
+-       * @to: cgroup @p was being moved to
+-       *
+-       * @p was cgroup_prep_move()'d but failed before reaching cgroup_move().
+-       * Undo the preparation.
+-       */
+-      void (*cgroup_cancel_move)(struct task_struct *p,
+-                                 struct cgroup *from, struct cgroup *to);
+-
+-      /**
+-       * @cgroup_set_weight: A cgroup's weight is being changed
+-       * @cgrp: cgroup whose weight is being updated
+-       * @weight: new weight [1..10000]
+-       *
+-       * Update @cgrp's weight to @weight.
+-       */
+-      void (*cgroup_set_weight)(struct cgroup *cgrp, u32 weight);
+-
+-      /**
+-       * @cgroup_set_bandwidth: A cgroup's bandwidth is being changed
+-       * @cgrp: cgroup whose bandwidth is being updated
+-       * @period_us: bandwidth control period
+-       * @quota_us: bandwidth control quota
+-       * @burst_us: bandwidth control burst
+-       *
+-       * Update @cgrp's bandwidth control parameters. This is from the cpu.max
+-       * cgroup interface.
+-       *
+-       * @quota_us / @period_us determines the CPU bandwidth @cgrp is entitled
+-       * to. For example, if @period_us is 1_000_000 and @quota_us is
+-       * 2_500_000. @cgrp is entitled to 2.5 CPUs. @burst_us can be
+-       * interpreted in the same fashion and specifies how much @cgrp can
+-       * burst temporarily. The specific control mechanism and thus the
+-       * interpretation of @period_us and burstiness is upto to the BPF
+-       * scheduler.
+-       */
+-      void (*cgroup_set_bandwidth)(struct cgroup *cgrp,
+-                                   u64 period_us, u64 quota_us, u64 burst_us);
+-
+-#endif        /* CONFIG_EXT_GROUP_SCHED */
+-
+-      /*
+-       * All online ops must come before ops.cpu_online().
+-       */
+-
+-      /**
+-       * @cpu_online: A CPU became online
+-       * @cpu: CPU which just came up
+-       *
+-       * @cpu just came online. @cpu will not call ops.enqueue() or
+-       * ops.dispatch(), nor run tasks associated with other CPUs beforehand.
+-       */
+-      void (*cpu_online)(s32 cpu);
+-
+-      /**
+-       * @cpu_offline: A CPU is going offline
+-       * @cpu: CPU which is going offline
+-       *
+-       * @cpu is going offline. @cpu will not call ops.enqueue() or
+-       * ops.dispatch(), nor run tasks associated with other CPUs afterwards.
+-       */
+-      void (*cpu_offline)(s32 cpu);
+-
+-      /*
+-       * All CPU hotplug ops must come before ops.init().
+-       */
+-
+-      /**
+-       * @init: Initialize the BPF scheduler
+-       */
+-      s32 (*init)(void);
+-
+-      /**
+-       * @exit: Clean up after the BPF scheduler
+-       * @info: Exit info
+-       *
+-       * ops.exit() is also called on ops.init() failure, which is a bit
+-       * unusual. This is to allow rich reporting through @info on how
+-       * ops.init() failed.
+-       */
+-      void (*exit)(struct scx_exit_info *info);
+-
+-      /**
+-       * @dispatch_max_batch: Max nr of tasks that dispatch() can dispatch
+-       */
+-      u32 dispatch_max_batch;
+-
+-      /**
+-       * @flags: %SCX_OPS_* flags
+-       */
+-      u64 flags;
+-
+-      /**
+-       * @timeout_ms: The maximum amount of time, in milliseconds, that a
+-       * runnable task should be able to wait before being scheduled. The
+-       * maximum timeout may not exceed the default timeout of 30 seconds.
+-       *
+-       * Defaults to the maximum allowed timeout value of 30 seconds.
+-       */
+-      u32 timeout_ms;
+-
+-      /**
+-       * @exit_dump_len: scx_exit_info.dump buffer length. If 0, the default
+-       * value of 32768 is used.
+-       */
+-      u32 exit_dump_len;
+-
+-      /**
+-       * @hotplug_seq: A sequence number that may be set by the scheduler to
+-       * detect when a hotplug event has occurred during the loading process.
+-       * If 0, no detection occurs. Otherwise, the scheduler will fail to
+-       * load if the sequence number does not match @scx_hotplug_seq on the
+-       * enable path.
+-       */
+-      u64 hotplug_seq;
+-
+-      /**
+-       * @name: BPF scheduler's name
+-       *
+-       * Must be a non-zero valid BPF object name including only isalnum(),
+-       * '_' and '.' chars. Shows up in kernel.sched_ext_ops sysctl while the
+-       * BPF scheduler is enabled.
+-       */
+-      char name[SCX_OPS_NAME_LEN];
+-
+-      /* internal use only, must be NULL */
+-      void *priv;
+-};
+-
+-enum scx_opi {
+-      SCX_OPI_BEGIN                   = 0,
+-      SCX_OPI_NORMAL_BEGIN            = 0,
+-      SCX_OPI_NORMAL_END              = SCX_OP_IDX(cpu_online),
+-      SCX_OPI_CPU_HOTPLUG_BEGIN       = SCX_OP_IDX(cpu_online),
+-      SCX_OPI_CPU_HOTPLUG_END         = SCX_OP_IDX(init),
+-      SCX_OPI_END                     = SCX_OP_IDX(init),
+-};
+-
+-/*
+- * Collection of event counters. Event types are placed in descending order.
+- */
+-struct scx_event_stats {
+-      /*
+-       * If ops.select_cpu() returns a CPU which can't be used by the task,
+-       * the core scheduler code silently picks a fallback CPU.
+-       */
+-      s64             SCX_EV_SELECT_CPU_FALLBACK;
+-
+-      /*
+-       * When dispatching to a local DSQ, the CPU may have gone offline in
+-       * the meantime. In this case, the task is bounced to the global DSQ.
+-       */
+-      s64             SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE;
+-
+-      /*
+-       * If SCX_OPS_ENQ_LAST is not set, the number of times that a task
+-       * continued to run because there were no other tasks on the CPU.
+-       */
+-      s64             SCX_EV_DISPATCH_KEEP_LAST;
+-
+-      /*
+-       * If SCX_OPS_ENQ_EXITING is not set, the number of times that a task
+-       * is dispatched to a local DSQ when exiting.
+-       */
+-      s64             SCX_EV_ENQ_SKIP_EXITING;
+-
+-      /*
+-       * If SCX_OPS_ENQ_MIGRATION_DISABLED is not set, the number of times a
+-       * migration disabled task skips ops.enqueue() and is dispatched to its
+-       * local DSQ.
+-       */
+-      s64             SCX_EV_ENQ_SKIP_MIGRATION_DISABLED;
+-
+-      /*
+-       * Total number of times a task's time slice was refilled with the
+-       * default value (SCX_SLICE_DFL).
+-       */
+-      s64             SCX_EV_REFILL_SLICE_DFL;
+-
+-      /*
+-       * The total duration of bypass modes in nanoseconds.
+-       */
+-      s64             SCX_EV_BYPASS_DURATION;
+-
+-      /*
+-       * The number of tasks dispatched in the bypassing mode.
+-       */
+-      s64             SCX_EV_BYPASS_DISPATCH;
+-
+-      /*
+-       * The number of times the bypassing mode has been activated.
+-       */
+-      s64             SCX_EV_BYPASS_ACTIVATE;
+-};
+-
+-struct scx_sched {
+-      struct sched_ext_ops    ops;
+-      DECLARE_BITMAP(has_op, SCX_OPI_END);
+-
+-      /*
+-       * Dispatch queues.
+-       *
+-       * The global DSQ (%SCX_DSQ_GLOBAL) is split per-node for scalability.
+-       * This is to avoid live-locking in bypass mode where all tasks are
+-       * dispatched to %SCX_DSQ_GLOBAL and all CPUs consume from it. If
+-       * per-node split isn't sufficient, it can be further split.
+-       */
+-      struct rhashtable       dsq_hash;
+-      struct scx_dispatch_q   **global_dsqs;
+-
+-      /*
+-       * The event counters are in a per-CPU variable to minimize the
+-       * accounting overhead. A system-wide view on the event counter is
+-       * constructed when requested by scx_bpf_events().
+-       */
+-      struct scx_event_stats __percpu *event_stats_cpu;
+-
+-      bool                    warned_zero_slice;
+-
+-      atomic_t                exit_kind;
+-      struct scx_exit_info    *exit_info;
+-
+-      struct kobject          kobj;
+-
+-      struct kthread_worker   *helper;
+-      struct irq_work         error_irq_work;
+-      struct kthread_work     disable_work;
+-      struct rcu_work         rcu_work;
+-};
+-
+-enum scx_wake_flags {
+-      /* expose select WF_* flags as enums */
+-      SCX_WAKE_FORK           = WF_FORK,
+-      SCX_WAKE_TTWU           = WF_TTWU,
+-      SCX_WAKE_SYNC           = WF_SYNC,
+-};
+-
+-enum scx_enq_flags {
+-      /* expose select ENQUEUE_* flags as enums */
+-      SCX_ENQ_WAKEUP          = ENQUEUE_WAKEUP,
+-      SCX_ENQ_HEAD            = ENQUEUE_HEAD,
+-      SCX_ENQ_CPU_SELECTED    = ENQUEUE_RQ_SELECTED,
+-
+-      /* high 32bits are SCX specific */
+-
+-      /*
+-       * Set the following to trigger preemption when calling
+-       * scx_bpf_dsq_insert() with a local dsq as the target. The slice of the
+-       * current task is cleared to zero and the CPU is kicked into the
+-       * scheduling path. Implies %SCX_ENQ_HEAD.
+-       */
+-      SCX_ENQ_PREEMPT         = 1LLU << 32,
+-
+-      /*
+-       * The task being enqueued was previously enqueued on the current CPU's
+-       * %SCX_DSQ_LOCAL, but was removed from it in a call to the
+-       * scx_bpf_reenqueue_local() kfunc. If scx_bpf_reenqueue_local() was
+-       * invoked in a ->cpu_release() callback, and the task is again
+-       * dispatched back to %SCX_LOCAL_DSQ by this current ->enqueue(), the
+-       * task will not be scheduled on the CPU until at least the next 
invocation
+-       * of the ->cpu_acquire() callback.
+-       */
+-      SCX_ENQ_REENQ           = 1LLU << 40,
+-
+-      /*
+-       * The task being enqueued is the only task available for the cpu. By
+-       * default, ext core keeps executing such tasks but when
+-       * %SCX_OPS_ENQ_LAST is specified, they're ops.enqueue()'d with the
+-       * %SCX_ENQ_LAST flag set.
+-       *
+-       * The BPF scheduler is responsible for triggering a follow-up
+-       * scheduling event. Otherwise, Execution may stall.
+-       */
+-      SCX_ENQ_LAST            = 1LLU << 41,
+-
+-      /* high 8 bits are internal */
+-      __SCX_ENQ_INTERNAL_MASK = 0xffLLU << 56,
+-
+-      SCX_ENQ_CLEAR_OPSS      = 1LLU << 56,
+-      SCX_ENQ_DSQ_PRIQ        = 1LLU << 57,
+-};
+-
+-enum scx_deq_flags {
+-      /* expose select DEQUEUE_* flags as enums */
+-      SCX_DEQ_SLEEP           = DEQUEUE_SLEEP,
+-
+-      /* high 32bits are SCX specific */
+-
+-      /*
+-       * The generic core-sched layer decided to execute the task even though
+-       * it hasn't been dispatched yet. Dequeue from the BPF side.
+-       */
+-      SCX_DEQ_CORE_SCHED_EXEC = 1LLU << 32,
+-};
+-
+-enum scx_pick_idle_cpu_flags {
+-      SCX_PICK_IDLE_CORE      = 1LLU << 0,    /* pick a CPU whose SMT 
siblings are also idle */
+-      SCX_PICK_IDLE_IN_NODE   = 1LLU << 1,    /* pick a CPU in the same 
target NUMA node */
+-};
+-
+-enum scx_kick_flags {
+-      /*
+-       * Kick the target CPU if idle. Guarantees that the target CPU goes
+-       * through at least one full scheduling cycle before going idle. If the
+-       * target CPU can be determined to be currently not idle and going to go
+-       * through a scheduling cycle before going idle, noop.
+-       */
+-      SCX_KICK_IDLE           = 1LLU << 0,
+-
+-      /*
+-       * Preempt the current task and execute the dispatch path. If the
+-       * current task of the target CPU is an SCX task, its ->scx.slice is
+-       * cleared to zero before the scheduling path is invoked so that the
+-       * task expires and the dispatch path is invoked.
+-       */
+-      SCX_KICK_PREEMPT        = 1LLU << 1,
+-
+-      /*
+-       * Wait for the CPU to be rescheduled. The scx_bpf_kick_cpu() call will
+-       * return after the target CPU finishes picking the next task.
+-       */
+-      SCX_KICK_WAIT           = 1LLU << 2,
+-};
+-
+-enum scx_tg_flags {
+-      SCX_TG_ONLINE           = 1U << 0,
+-      SCX_TG_INITED           = 1U << 1,
+-};
+-
+-enum scx_enable_state {
+-      SCX_ENABLING,
+-      SCX_ENABLED,
+-      SCX_DISABLING,
+-      SCX_DISABLED,
+-};
+-
+-static const char *scx_enable_state_str[] = {
+-      [SCX_ENABLING]          = "enabling",
+-      [SCX_ENABLED]           = "enabled",
+-      [SCX_DISABLING]         = "disabling",
+-      [SCX_DISABLED]          = "disabled",
+-};
+-
+-/*
+- * sched_ext_entity->ops_state
+- *
+- * Used to track the task ownership between the SCX core and the BPF 
scheduler.
+- * State transitions look as follows:
+- *
+- * NONE -> QUEUEING -> QUEUED -> DISPATCHING
+- *   ^              |                 |
+- *   |              v                 v
+- *   \-------------------------------/
+- *
+- * QUEUEING and DISPATCHING states can be waited upon. See wait_ops_state() 
call
+- * sites for explanations on the conditions being waited upon and why they are
+- * safe. Transitions out of them into NONE or QUEUED must store_release and 
the
+- * waiters should load_acquire.
+- *
+- * Tracking scx_ops_state enables sched_ext core to reliably determine whether
+- * any given task can be dispatched by the BPF scheduler at all times and thus
+- * relaxes the requirements on the BPF scheduler. This allows the BPF 
scheduler
+- * to try to dispatch any task anytime regardless of its state as the SCX core
+- * can safely reject invalid dispatches.
+- */
+-enum scx_ops_state {
+-      SCX_OPSS_NONE,          /* owned by the SCX core */
+-      SCX_OPSS_QUEUEING,      /* in transit to the BPF scheduler */
+-      SCX_OPSS_QUEUED,        /* owned by the BPF scheduler */
+-      SCX_OPSS_DISPATCHING,   /* in transit back to the SCX core */
+-
+-      /*
+-       * QSEQ brands each QUEUED instance so that, when dispatch races
+-       * dequeue/requeue, the dispatcher can tell whether it still has a claim
+-       * on the task being dispatched.
+-       *
+-       * As some 32bit archs can't do 64bit store_release/load_acquire,
+-       * p->scx.ops_state is atomic_long_t which leaves 30 bits for QSEQ on
+-       * 32bit machines. The dispatch race window QSEQ protects is very narrow
+-       * and runs with IRQ disabled. 30 bits should be sufficient.
+-       */
+-      SCX_OPSS_QSEQ_SHIFT     = 2,
+-};
+-
+-/* Use macros to ensure that the type is unsigned long for the masks */
+-#define SCX_OPSS_STATE_MASK   ((1LU << SCX_OPSS_QSEQ_SHIFT) - 1)
+-#define SCX_OPSS_QSEQ_MASK    (~SCX_OPSS_STATE_MASK)
+-
+ /*
+  * NOTE: sched_ext is in the process of growing multiple scheduler support and
+  * scx_root usage is in a transitional state. Naked dereferences are safe if 
the
+@@ -1664,7 +630,7 @@ static struct task_struct 
*scx_task_iter_next_locked(struct scx_task_iter *iter)
+  * This can be used when preemption is not disabled.
+  */
+ #define scx_add_event(sch, name, cnt) do {                                    
\
+-      this_cpu_add((sch)->event_stats_cpu->name, (cnt));                      
\
++      this_cpu_add((sch)->pcpu->event_stats.name, (cnt));                     
\
+       trace_sched_ext_event(#name, (cnt));                                    
\
+ } while(0)
+ 
+@@ -1677,7 +643,7 @@ static struct task_struct 
*scx_task_iter_next_locked(struct scx_task_iter *iter)
+  * This should be used only when preemption is disabled.
+  */
+ #define __scx_add_event(sch, name, cnt) do {                                  
\
+-      __this_cpu_add((sch)->event_stats_cpu->name, (cnt));                    
\
++      __this_cpu_add((sch)->pcpu->event_stats.name, (cnt));                   
\
+       trace_sched_ext_event(#name, cnt);                                      
\
+ } while(0)
+ 
+@@ -4571,8 +3537,10 @@ static void scx_sched_free_rcu_work(struct work_struct 
*work)
+       struct scx_dispatch_q *dsq;
+       int node;
+ 
++      irq_work_sync(&sch->error_irq_work);
+       kthread_stop(sch->helper->task);
+-      free_percpu(sch->event_stats_cpu);
++
++      free_percpu(sch->pcpu);
+ 
+       for_each_node_state(node, N_POSSIBLE)
+               kfree(sch->global_dsqs[node]);
+@@ -5473,13 +4441,13 @@ static struct scx_sched 
*scx_alloc_and_add_sched(struct sched_ext_ops *ops)
+               sch->global_dsqs[node] = dsq;
+       }
+ 
+-      sch->event_stats_cpu = alloc_percpu(struct scx_event_stats);
+-      if (!sch->event_stats_cpu)
++      sch->pcpu = alloc_percpu(struct scx_sched_pcpu);
++      if (!sch->pcpu)
+               goto err_free_gdsqs;
+ 
+       sch->helper = kthread_run_worker(0, "sched_ext_helper");
+       if (!sch->helper)
+-              goto err_free_event_stats;
++              goto err_free_pcpu;
+       sched_set_fifo(sch->helper->task);
+ 
+       atomic_set(&sch->exit_kind, SCX_EXIT_NONE);
+@@ -5497,8 +4465,8 @@ static struct scx_sched *scx_alloc_and_add_sched(struct 
sched_ext_ops *ops)
+ 
+ err_stop_helper:
+       kthread_stop(sch->helper->task);
+-err_free_event_stats:
+-      free_percpu(sch->event_stats_cpu);
++err_free_pcpu:
++      free_percpu(sch->pcpu);
+ err_free_gdsqs:
+       for_each_node_state(node, N_POSSIBLE)
+               kfree(sch->global_dsqs[node]);
+@@ -5795,7 +4763,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct 
bpf_link *link)
+ err_disable_unlock_all:
+       scx_cgroup_unlock();
+       percpu_up_write(&scx_fork_rwsem);
+-      scx_bypass(false);
++      /* we'll soon enter disable path, keep bypass on */
+ err_disable:
+       mutex_unlock(&scx_enable_mutex);
+       /*
+@@ -7524,7 +6492,7 @@ static void scx_read_events(struct scx_sched *sch, 
struct scx_event_stats *event
+       /* Aggregate per-CPU event counters into @events. */
+       memset(events, 0, sizeof(*events));
+       for_each_possible_cpu(cpu) {
+-              e_cpu = per_cpu_ptr(sch->event_stats_cpu, cpu);
++              e_cpu = &per_cpu_ptr(sch->pcpu, cpu)->event_stats;
+               scx_agg_event(events, e_cpu, SCX_EV_SELECT_CPU_FALLBACK);
+               scx_agg_event(events, e_cpu, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE);
+               scx_agg_event(events, e_cpu, SCX_EV_DISPATCH_KEEP_LAST);
+diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h
+index 292bb41a242ec1..33858607bc97f5 100644
+--- a/kernel/sched/ext.h
++++ b/kernel/sched/ext.h
+@@ -8,29 +8,6 @@
+  */
+ #ifdef CONFIG_SCHED_CLASS_EXT
+ 
+-static inline bool scx_kf_allowed_if_unlocked(void)
+-{
+-      return !current->scx.kf_mask;
+-}
+-
+-static inline bool scx_rq_bypassing(struct rq *rq)
+-{
+-      return unlikely(rq->scx.flags & SCX_RQ_BYPASSING);
+-}
+-
+-DECLARE_STATIC_KEY_FALSE(scx_ops_allow_queued_wakeup);
+-
+-DECLARE_PER_CPU(struct rq *, scx_locked_rq_state);
+-
+-/*
+- * Return the rq currently locked from an scx callback, or NULL if no rq is
+- * locked.
+- */
+-static inline struct rq *scx_locked_rq(void)
+-{
+-      return __this_cpu_read(scx_locked_rq_state);
+-}
+-
+ void scx_tick(struct rq *rq);
+ void init_scx_entity(struct sched_ext_entity *scx);
+ void scx_pre_fork(struct task_struct *p);
+diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
+new file mode 100644
+index 00000000000000..af4c054fb6f852
+--- /dev/null
++++ b/kernel/sched/ext_internal.h
+@@ -0,0 +1,1064 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * BPF extensible scheduler class: Documentation/scheduler/sched-ext.rst
++ *
++ * Copyright (c) 2025 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2025 Tejun Heo <[email protected]>
++ */
++#define SCX_OP_IDX(op)                (offsetof(struct sched_ext_ops, op) / 
sizeof(void (*)(void)))
++
++enum scx_consts {
++      SCX_DSP_DFL_MAX_BATCH           = 32,
++      SCX_DSP_MAX_LOOPS               = 32,
++      SCX_WATCHDOG_MAX_TIMEOUT        = 30 * HZ,
++
++      SCX_EXIT_BT_LEN                 = 64,
++      SCX_EXIT_MSG_LEN                = 1024,
++      SCX_EXIT_DUMP_DFL_LEN           = 32768,
++
++      SCX_CPUPERF_ONE                 = SCHED_CAPACITY_SCALE,
++
++      /*
++       * Iterating all tasks may take a while. Periodically drop
++       * scx_tasks_lock to avoid causing e.g. CSD and RCU stalls.
++       */
++      SCX_TASK_ITER_BATCH             = 32,
++};
++
++enum scx_exit_kind {
++      SCX_EXIT_NONE,
++      SCX_EXIT_DONE,
++
++      SCX_EXIT_UNREG = 64,    /* user-space initiated unregistration */
++      SCX_EXIT_UNREG_BPF,     /* BPF-initiated unregistration */
++      SCX_EXIT_UNREG_KERN,    /* kernel-initiated unregistration */
++      SCX_EXIT_SYSRQ,         /* requested by 'S' sysrq */
++
++      SCX_EXIT_ERROR = 1024,  /* runtime error, error msg contains details */
++      SCX_EXIT_ERROR_BPF,     /* ERROR but triggered through scx_bpf_error() 
*/
++      SCX_EXIT_ERROR_STALL,   /* watchdog detected stalled runnable tasks */
++};
++
++/*
++ * An exit code can be specified when exiting with scx_bpf_exit() or 
scx_exit(),
++ * corresponding to exit_kind UNREG_BPF and UNREG_KERN respectively. The codes
++ * are 64bit of the format:
++ *
++ *   Bits: [63  ..  48 47   ..  32 31 .. 0]
++ *         [ SYS ACT ] [ SYS RSN ] [ USR  ]
++ *
++ *   SYS ACT: System-defined exit actions
++ *   SYS RSN: System-defined exit reasons
++ *   USR    : User-defined exit codes and reasons
++ *
++ * Using the above, users may communicate intention and context by ORing 
system
++ * actions and/or system reasons with a user-defined exit code.
++ */
++enum scx_exit_code {
++      /* Reasons */
++      SCX_ECODE_RSN_HOTPLUG   = 1LLU << 32,
++
++      /* Actions */
++      SCX_ECODE_ACT_RESTART   = 1LLU << 48,
++};
++
++/*
++ * scx_exit_info is passed to ops.exit() to describe why the BPF scheduler is
++ * being disabled.
++ */
++struct scx_exit_info {
++      /* %SCX_EXIT_* - broad category of the exit reason */
++      enum scx_exit_kind      kind;
++
++      /* exit code if gracefully exiting */
++      s64                     exit_code;
++
++      /* textual representation of the above */
++      const char              *reason;
++
++      /* backtrace if exiting due to an error */
++      unsigned long           *bt;
++      u32                     bt_len;
++
++      /* informational message */
++      char                    *msg;
++
++      /* debug dump */
++      char                    *dump;
++};
++
++/* sched_ext_ops.flags */
++enum scx_ops_flags {
++      /*
++       * Keep built-in idle tracking even if ops.update_idle() is implemented.
++       */
++      SCX_OPS_KEEP_BUILTIN_IDLE       = 1LLU << 0,
++
++      /*
++       * By default, if there are no other task to run on the CPU, ext core
++       * keeps running the current task even after its slice expires. If this
++       * flag is specified, such tasks are passed to ops.enqueue() with
++       * %SCX_ENQ_LAST. See the comment above %SCX_ENQ_LAST for more info.
++       */
++      SCX_OPS_ENQ_LAST                = 1LLU << 1,
++
++      /*
++       * An exiting task may schedule after PF_EXITING is set. In such cases,
++       * bpf_task_from_pid() may not be able to find the task and if the BPF
++       * scheduler depends on pid lookup for dispatching, the task will be
++       * lost leading to various issues including RCU grace period stalls.
++       *
++       * To mask this problem, by default, unhashed tasks are automatically
++       * dispatched to the local DSQ on enqueue. If the BPF scheduler doesn't
++       * depend on pid lookups and wants to handle these tasks directly, the
++       * following flag can be used.
++       */
++      SCX_OPS_ENQ_EXITING             = 1LLU << 2,
++
++      /*
++       * If set, only tasks with policy set to SCHED_EXT are attached to
++       * sched_ext. If clear, SCHED_NORMAL tasks are also included.
++       */
++      SCX_OPS_SWITCH_PARTIAL          = 1LLU << 3,
++
++      /*
++       * A migration disabled task can only execute on its current CPU. By
++       * default, such tasks are automatically put on the CPU's local DSQ with
++       * the default slice on enqueue. If this ops flag is set, they also go
++       * through ops.enqueue().
++       *
++       * A migration disabled task never invokes ops.select_cpu() as it can
++       * only select the current CPU. Also, p->cpus_ptr will only contain its
++       * current CPU while p->nr_cpus_allowed keeps tracking p->user_cpus_ptr
++       * and thus may disagree with cpumask_weight(p->cpus_ptr).
++       */
++      SCX_OPS_ENQ_MIGRATION_DISABLED  = 1LLU << 4,
++
++      /*
++       * Queued wakeup (ttwu_queue) is a wakeup optimization that invokes
++       * ops.enqueue() on the ops.select_cpu() selected or the wakee's
++       * previous CPU via IPI (inter-processor interrupt) to reduce cacheline
++       * transfers. When this optimization is enabled, ops.select_cpu() is
++       * skipped in some cases (when racing against the wakee switching out).
++       * As the BPF scheduler may depend on ops.select_cpu() being invoked
++       * during wakeups, queued wakeup is disabled by default.
++       *
++       * If this ops flag is set, queued wakeup optimization is enabled and
++       * the BPF scheduler must be able to handle ops.enqueue() invoked on the
++       * wakee's CPU without preceding ops.select_cpu() even for tasks which
++       * may be executed on multiple CPUs.
++       */
++      SCX_OPS_ALLOW_QUEUED_WAKEUP     = 1LLU << 5,
++
++      /*
++       * If set, enable per-node idle cpumasks. If clear, use a single global
++       * flat idle cpumask.
++       */
++      SCX_OPS_BUILTIN_IDLE_PER_NODE   = 1LLU << 6,
++
++      /*
++       * CPU cgroup support flags
++       */
++      SCX_OPS_HAS_CGROUP_WEIGHT       = 1LLU << 16,   /* DEPRECATED, will be 
removed on 6.18 */
++
++      SCX_OPS_ALL_FLAGS               = SCX_OPS_KEEP_BUILTIN_IDLE |
++                                        SCX_OPS_ENQ_LAST |
++                                        SCX_OPS_ENQ_EXITING |
++                                        SCX_OPS_ENQ_MIGRATION_DISABLED |
++                                        SCX_OPS_ALLOW_QUEUED_WAKEUP |
++                                        SCX_OPS_SWITCH_PARTIAL |
++                                        SCX_OPS_BUILTIN_IDLE_PER_NODE |
++                                        SCX_OPS_HAS_CGROUP_WEIGHT,
++
++      /* high 8 bits are internal, don't include in SCX_OPS_ALL_FLAGS */
++      __SCX_OPS_INTERNAL_MASK         = 0xffLLU << 56,
++
++      SCX_OPS_HAS_CPU_PREEMPT         = 1LLU << 56,
++};
++
++/* argument container for ops.init_task() */
++struct scx_init_task_args {
++      /*
++       * Set if ops.init_task() is being invoked on the fork path, as opposed
++       * to the scheduler transition path.
++       */
++      bool                    fork;
++#ifdef CONFIG_EXT_GROUP_SCHED
++      /* the cgroup the task is joining */
++      struct cgroup           *cgroup;
++#endif
++};
++
++/* argument container for ops.exit_task() */
++struct scx_exit_task_args {
++      /* Whether the task exited before running on sched_ext. */
++      bool cancelled;
++};
++
++/* argument container for ops->cgroup_init() */
++struct scx_cgroup_init_args {
++      /* the weight of the cgroup [1..10000] */
++      u32                     weight;
++
++      /* bandwidth control parameters from cpu.max and cpu.max.burst */
++      u64                     bw_period_us;
++      u64                     bw_quota_us;
++      u64                     bw_burst_us;
++};
++
++enum scx_cpu_preempt_reason {
++      /* next task is being scheduled by &sched_class_rt */
++      SCX_CPU_PREEMPT_RT,
++      /* next task is being scheduled by &sched_class_dl */
++      SCX_CPU_PREEMPT_DL,
++      /* next task is being scheduled by &sched_class_stop */
++      SCX_CPU_PREEMPT_STOP,
++      /* unknown reason for SCX being preempted */
++      SCX_CPU_PREEMPT_UNKNOWN,
++};
++
++/*
++ * Argument container for ops->cpu_acquire(). Currently empty, but may be
++ * expanded in the future.
++ */
++struct scx_cpu_acquire_args {};
++
++/* argument container for ops->cpu_release() */
++struct scx_cpu_release_args {
++      /* the reason the CPU was preempted */
++      enum scx_cpu_preempt_reason reason;
++
++      /* the task that's going to be scheduled on the CPU */
++      struct task_struct      *task;
++};
++
++/*
++ * Informational context provided to dump operations.
++ */
++struct scx_dump_ctx {
++      enum scx_exit_kind      kind;
++      s64                     exit_code;
++      const char              *reason;
++      u64                     at_ns;
++      u64                     at_jiffies;
++};
++
++/**
++ * struct sched_ext_ops - Operation table for BPF scheduler implementation
++ *
++ * A BPF scheduler can implement an arbitrary scheduling policy by
++ * implementing and loading operations in this table. Note that a userland
++ * scheduling policy can also be implemented using the BPF scheduler
++ * as a shim layer.
++ */
++struct sched_ext_ops {
++      /**
++       * @select_cpu: Pick the target CPU for a task which is being woken up
++       * @p: task being woken up
++       * @prev_cpu: the cpu @p was on before sleeping
++       * @wake_flags: SCX_WAKE_*
++       *
++       * Decision made here isn't final. @p may be moved to any CPU while it
++       * is getting dispatched for execution later. However, as @p is not on
++       * the rq at this point, getting the eventual execution CPU right here
++       * saves a small bit of overhead down the line.
++       *
++       * If an idle CPU is returned, the CPU is kicked and will try to
++       * dispatch. While an explicit custom mechanism can be added,
++       * select_cpu() serves as the default way to wake up idle CPUs.
++       *
++       * @p may be inserted into a DSQ directly by calling
++       * scx_bpf_dsq_insert(). If so, the ops.enqueue() will be skipped.
++       * Directly inserting into %SCX_DSQ_LOCAL will put @p in the local DSQ
++       * of the CPU returned by this operation.
++       *
++       * Note that select_cpu() is never called for tasks that can only run
++       * on a single CPU or tasks with migration disabled, as they don't have
++       * the option to select a different CPU. See select_task_rq() for
++       * details.
++       */
++      s32 (*select_cpu)(struct task_struct *p, s32 prev_cpu, u64 wake_flags);
++
++      /**
++       * @enqueue: Enqueue a task on the BPF scheduler
++       * @p: task being enqueued
++       * @enq_flags: %SCX_ENQ_*
++       *
++       * @p is ready to run. Insert directly into a DSQ by calling
++       * scx_bpf_dsq_insert() or enqueue on the BPF scheduler. If not directly
++       * inserted, the bpf scheduler owns @p and if it fails to dispatch @p,
++       * the task will stall.
++       *
++       * If @p was inserted into a DSQ from ops.select_cpu(), this callback is
++       * skipped.
++       */
++      void (*enqueue)(struct task_struct *p, u64 enq_flags);
++
++      /**
++       * @dequeue: Remove a task from the BPF scheduler
++       * @p: task being dequeued
++       * @deq_flags: %SCX_DEQ_*
++       *
++       * Remove @p from the BPF scheduler. This is usually called to isolate
++       * the task while updating its scheduling properties (e.g. priority).
++       *
++       * The ext core keeps track of whether the BPF side owns a given task or
++       * not and can gracefully ignore spurious dispatches from BPF side,
++       * which makes it safe to not implement this method. However, depending
++       * on the scheduling logic, this can lead to confusing behaviors - e.g.
++       * scheduling position not being updated across a priority change.
++       */
++      void (*dequeue)(struct task_struct *p, u64 deq_flags);
++
++      /**
++       * @dispatch: Dispatch tasks from the BPF scheduler and/or user DSQs
++       * @cpu: CPU to dispatch tasks for
++       * @prev: previous task being switched out
++       *
++       * Called when a CPU's local dsq is empty. The operation should dispatch
++       * one or more tasks from the BPF scheduler into the DSQs using
++       * scx_bpf_dsq_insert() and/or move from user DSQs into the local DSQ
++       * using scx_bpf_dsq_move_to_local().
++       *
++       * The maximum number of times scx_bpf_dsq_insert() can be called
++       * without an intervening scx_bpf_dsq_move_to_local() is specified by
++       * ops.dispatch_max_batch. See the comments on top of the two functions
++       * for more details.
++       *
++       * When not %NULL, @prev is an SCX task with its slice depleted. If
++       * @prev is still runnable as indicated by set %SCX_TASK_QUEUED in
++       * @prev->scx.flags, it is not enqueued yet and will be enqueued after
++       * ops.dispatch() returns. To keep executing @prev, return without
++       * dispatching or moving any tasks. Also see %SCX_OPS_ENQ_LAST.
++       */
++      void (*dispatch)(s32 cpu, struct task_struct *prev);
++
++      /**
++       * @tick: Periodic tick
++       * @p: task running currently
++       *
++       * This operation is called every 1/HZ seconds on CPUs which are
++       * executing an SCX task. Setting @p->scx.slice to 0 will trigger an
++       * immediate dispatch cycle on the CPU.
++       */
++      void (*tick)(struct task_struct *p);
++
++      /**
++       * @runnable: A task is becoming runnable on its associated CPU
++       * @p: task becoming runnable
++       * @enq_flags: %SCX_ENQ_*
++       *
++       * This and the following three functions can be used to track a task's
++       * execution state transitions. A task becomes ->runnable() on a CPU,
++       * and then goes through one or more ->running() and ->stopping() pairs
++       * as it runs on the CPU, and eventually becomes ->quiescent() when it's
++       * done running on the CPU.
++       *
++       * @p is becoming runnable on the CPU because it's
++       *
++       * - waking up (%SCX_ENQ_WAKEUP)
++       * - being moved from another CPU
++       * - being restored after temporarily taken off the queue for an
++       *   attribute change.
++       *
++       * This and ->enqueue() are related but not coupled. This operation
++       * notifies @p's state transition and may not be followed by ->enqueue()
++       * e.g. when @p is being dispatched to a remote CPU, or when @p is
++       * being enqueued on a CPU experiencing a hotplug event. Likewise, a
++       * task may be ->enqueue()'d without being preceded by this operation
++       * e.g. after exhausting its slice.
++       */
++      void (*runnable)(struct task_struct *p, u64 enq_flags);
++
++      /**
++       * @running: A task is starting to run on its associated CPU
++       * @p: task starting to run
++       *
++       * Note that this callback may be called from a CPU other than the
++       * one the task is going to run on. This can happen when a task
++       * property is changed (i.e., affinity), since scx_next_task_scx(),
++       * which triggers this callback, may run on a CPU different from
++       * the task's assigned CPU.
++       *
++       * Therefore, always use scx_bpf_task_cpu(@p) to determine the
++       * target CPU the task is going to use.
++       *
++       * See ->runnable() for explanation on the task state notifiers.
++       */
++      void (*running)(struct task_struct *p);
++
++      /**
++       * @stopping: A task is stopping execution
++       * @p: task stopping to run
++       * @runnable: is task @p still runnable?
++       *
++       * Note that this callback may be called from a CPU other than the
++       * one the task was running on. This can happen when a task
++       * property is changed (i.e., affinity), since dequeue_task_scx(),
++       * which triggers this callback, may run on a CPU different from
++       * the task's assigned CPU.
++       *
++       * Therefore, always use scx_bpf_task_cpu(@p) to retrieve the CPU
++       * the task was running on.
++       *
++       * See ->runnable() for explanation on the task state notifiers. If
++       * !@runnable, ->quiescent() will be invoked after this operation
++       * returns.
++       */
++      void (*stopping)(struct task_struct *p, bool runnable);
++
++      /**
++       * @quiescent: A task is becoming not runnable on its associated CPU
++       * @p: task becoming not runnable
++       * @deq_flags: %SCX_DEQ_*
++       *
++       * See ->runnable() for explanation on the task state notifiers.
++       *
++       * @p is becoming quiescent on the CPU because it's
++       *
++       * - sleeping (%SCX_DEQ_SLEEP)
++       * - being moved to another CPU
++       * - being temporarily taken off the queue for an attribute change
++       *   (%SCX_DEQ_SAVE)
++       *
++       * This and ->dequeue() are related but not coupled. This operation
++       * notifies @p's state transition and may not be preceded by ->dequeue()
++       * e.g. when @p is being dispatched to a remote CPU.
++       */
++      void (*quiescent)(struct task_struct *p, u64 deq_flags);
++
++      /**
++       * @yield: Yield CPU
++       * @from: yielding task
++       * @to: optional yield target task
++       *
++       * If @to is NULL, @from is yielding the CPU to other runnable tasks.
++       * The BPF scheduler should ensure that other available tasks are
++       * dispatched before the yielding task. Return value is ignored in this
++       * case.
++       *
++       * If @to is not-NULL, @from wants to yield the CPU to @to. If the bpf
++       * scheduler can implement the request, return %true; otherwise, %false.
++       */
++      bool (*yield)(struct task_struct *from, struct task_struct *to);
++
++      /**
++       * @core_sched_before: Task ordering for core-sched
++       * @a: task A
++       * @b: task B
++       *
++       * Used by core-sched to determine the ordering between two tasks. See
++       * Documentation/admin-guide/hw-vuln/core-scheduling.rst for details on
++       * core-sched.
++       *
++       * Both @a and @b are runnable and may or may not currently be queued on
++       * the BPF scheduler. Should return %true if @a should run before @b.
++       * %false if there's no required ordering or @b should run before @a.
++       *
++       * If not specified, the default is ordering them according to when they
++       * became runnable.
++       */
++      bool (*core_sched_before)(struct task_struct *a, struct task_struct *b);
++
++      /**
++       * @set_weight: Set task weight
++       * @p: task to set weight for
++       * @weight: new weight [1..10000]
++       *
++       * Update @p's weight to @weight.
++       */
++      void (*set_weight)(struct task_struct *p, u32 weight);
++
++      /**
++       * @set_cpumask: Set CPU affinity
++       * @p: task to set CPU affinity for
++       * @cpumask: cpumask of cpus that @p can run on
++       *
++       * Update @p's CPU affinity to @cpumask.
++       */
++      void (*set_cpumask)(struct task_struct *p,
++                          const struct cpumask *cpumask);
++
++      /**
++       * @update_idle: Update the idle state of a CPU
++       * @cpu: CPU to update the idle state for
++       * @idle: whether entering or exiting the idle state
++       *
++       * This operation is called when @rq's CPU goes or leaves the idle
++       * state. By default, implementing this operation disables the built-in
++       * idle CPU tracking and the following helpers become unavailable:
++       *
++       * - scx_bpf_select_cpu_dfl()
++       * - scx_bpf_select_cpu_and()
++       * - scx_bpf_test_and_clear_cpu_idle()
++       * - scx_bpf_pick_idle_cpu()
++       *
++       * The user also must implement ops.select_cpu() as the default
++       * implementation relies on scx_bpf_select_cpu_dfl().
++       *
++       * Specify the %SCX_OPS_KEEP_BUILTIN_IDLE flag to keep the built-in idle
++       * tracking.
++       */
++      void (*update_idle)(s32 cpu, bool idle);
++
++      /**
++       * @cpu_acquire: A CPU is becoming available to the BPF scheduler
++       * @cpu: The CPU being acquired by the BPF scheduler.
++       * @args: Acquire arguments, see the struct definition.
++       *
++       * A CPU that was previously released from the BPF scheduler is now once
++       * again under its control.
++       */
++      void (*cpu_acquire)(s32 cpu, struct scx_cpu_acquire_args *args);
++
++      /**
++       * @cpu_release: A CPU is taken away from the BPF scheduler
++       * @cpu: The CPU being released by the BPF scheduler.
++       * @args: Release arguments, see the struct definition.
++       *
++       * The specified CPU is no longer under the control of the BPF
++       * scheduler. This could be because it was preempted by a higher
++       * priority sched_class, though there may be other reasons as well. The
++       * caller should consult @args->reason to determine the cause.
++       */
++      void (*cpu_release)(s32 cpu, struct scx_cpu_release_args *args);
++
++      /**
++       * @init_task: Initialize a task to run in a BPF scheduler
++       * @p: task to initialize for BPF scheduling
++       * @args: init arguments, see the struct definition
++       *
++       * Either we're loading a BPF scheduler or a new task is being forked.
++       * Initialize @p for BPF scheduling. This operation may block and can
++       * be used for allocations, and is called exactly once for a task.
++       *
++       * Return 0 for success, -errno for failure. An error return while
++       * loading will abort loading of the BPF scheduler. During a fork, it
++       * will abort that specific fork.
++       */
++      s32 (*init_task)(struct task_struct *p, struct scx_init_task_args 
*args);
++
++      /**
++       * @exit_task: Exit a previously-running task from the system
++       * @p: task to exit
++       * @args: exit arguments, see the struct definition
++       *
++       * @p is exiting or the BPF scheduler is being unloaded. Perform any
++       * necessary cleanup for @p.
++       */
++      void (*exit_task)(struct task_struct *p, struct scx_exit_task_args 
*args);
++
++      /**
++       * @enable: Enable BPF scheduling for a task
++       * @p: task to enable BPF scheduling for
++       *
++       * Enable @p for BPF scheduling. enable() is called on @p any time it
++       * enters SCX, and is always paired with a matching disable().
++       */
++      void (*enable)(struct task_struct *p);
++
++      /**
++       * @disable: Disable BPF scheduling for a task
++       * @p: task to disable BPF scheduling for
++       *
++       * @p is exiting, leaving SCX or the BPF scheduler is being unloaded.
++       * Disable BPF scheduling for @p. A disable() call is always matched
++       * with a prior enable() call.
++       */
++      void (*disable)(struct task_struct *p);
++
++      /**
++       * @dump: Dump BPF scheduler state on error
++       * @ctx: debug dump context
++       *
++       * Use scx_bpf_dump() to generate BPF scheduler specific debug dump.
++       */
++      void (*dump)(struct scx_dump_ctx *ctx);
++
++      /**
++       * @dump_cpu: Dump BPF scheduler state for a CPU on error
++       * @ctx: debug dump context
++       * @cpu: CPU to generate debug dump for
++       * @idle: @cpu is currently idle without any runnable tasks
++       *
++       * Use scx_bpf_dump() to generate BPF scheduler specific debug dump for
++       * @cpu. If @idle is %true and this operation doesn't produce any
++       * output, @cpu is skipped for dump.
++       */
++      void (*dump_cpu)(struct scx_dump_ctx *ctx, s32 cpu, bool idle);
++
++      /**
++       * @dump_task: Dump BPF scheduler state for a runnable task on error
++       * @ctx: debug dump context
++       * @p: runnable task to generate debug dump for
++       *
++       * Use scx_bpf_dump() to generate BPF scheduler specific debug dump for
++       * @p.
++       */
++      void (*dump_task)(struct scx_dump_ctx *ctx, struct task_struct *p);
++
++#ifdef CONFIG_EXT_GROUP_SCHED
++      /**
++       * @cgroup_init: Initialize a cgroup
++       * @cgrp: cgroup being initialized
++       * @args: init arguments, see the struct definition
++       *
++       * Either the BPF scheduler is being loaded or @cgrp created, initialize
++       * @cgrp for sched_ext. This operation may block.
++       *
++       * Return 0 for success, -errno for failure. An error return while
++       * loading will abort loading of the BPF scheduler. During cgroup
++       * creation, it will abort the specific cgroup creation.
++       */
++      s32 (*cgroup_init)(struct cgroup *cgrp,
++                         struct scx_cgroup_init_args *args);
++
++      /**
++       * @cgroup_exit: Exit a cgroup
++       * @cgrp: cgroup being exited
++       *
++       * Either the BPF scheduler is being unloaded or @cgrp destroyed, exit
++       * @cgrp for sched_ext. This operation my block.
++       */
++      void (*cgroup_exit)(struct cgroup *cgrp);
++
++      /**
++       * @cgroup_prep_move: Prepare a task to be moved to a different cgroup
++       * @p: task being moved
++       * @from: cgroup @p is being moved from
++       * @to: cgroup @p is being moved to
++       *
++       * Prepare @p for move from cgroup @from to @to. This operation may
++       * block and can be used for allocations.
++       *
++       * Return 0 for success, -errno for failure. An error return aborts the
++       * migration.
++       */
++      s32 (*cgroup_prep_move)(struct task_struct *p,
++                              struct cgroup *from, struct cgroup *to);
++
++      /**
++       * @cgroup_move: Commit cgroup move
++       * @p: task being moved
++       * @from: cgroup @p is being moved from
++       * @to: cgroup @p is being moved to
++       *
++       * Commit the move. @p is dequeued during this operation.
++       */
++      void (*cgroup_move)(struct task_struct *p,
++                          struct cgroup *from, struct cgroup *to);
++
++      /**
++       * @cgroup_cancel_move: Cancel cgroup move
++       * @p: task whose cgroup move is being canceled
++       * @from: cgroup @p was being moved from
++       * @to: cgroup @p was being moved to
++       *
++       * @p was cgroup_prep_move()'d but failed before reaching cgroup_move().
++       * Undo the preparation.
++       */
++      void (*cgroup_cancel_move)(struct task_struct *p,
++                                 struct cgroup *from, struct cgroup *to);
++
++      /**
++       * @cgroup_set_weight: A cgroup's weight is being changed
++       * @cgrp: cgroup whose weight is being updated
++       * @weight: new weight [1..10000]
++       *
++       * Update @cgrp's weight to @weight.
++       */
++      void (*cgroup_set_weight)(struct cgroup *cgrp, u32 weight);
++
++      /**
++       * @cgroup_set_bandwidth: A cgroup's bandwidth is being changed
++       * @cgrp: cgroup whose bandwidth is being updated
++       * @period_us: bandwidth control period
++       * @quota_us: bandwidth control quota
++       * @burst_us: bandwidth control burst
++       *
++       * Update @cgrp's bandwidth control parameters. This is from the cpu.max
++       * cgroup interface.
++       *
++       * @quota_us / @period_us determines the CPU bandwidth @cgrp is entitled
++       * to. For example, if @period_us is 1_000_000 and @quota_us is
++       * 2_500_000. @cgrp is entitled to 2.5 CPUs. @burst_us can be
++       * interpreted in the same fashion and specifies how much @cgrp can
++       * burst temporarily. The specific control mechanism and thus the
++       * interpretation of @period_us and burstiness is upto to the BPF
++       * scheduler.
++       */
++      void (*cgroup_set_bandwidth)(struct cgroup *cgrp,
++                                   u64 period_us, u64 quota_us, u64 burst_us);
++
++#endif        /* CONFIG_EXT_GROUP_SCHED */
++
++      /*
++       * All online ops must come before ops.cpu_online().
++       */
++
++      /**
++       * @cpu_online: A CPU became online
++       * @cpu: CPU which just came up
++       *
++       * @cpu just came online. @cpu will not call ops.enqueue() or
++       * ops.dispatch(), nor run tasks associated with other CPUs beforehand.
++       */
++      void (*cpu_online)(s32 cpu);
++
++      /**
++       * @cpu_offline: A CPU is going offline
++       * @cpu: CPU which is going offline
++       *
++       * @cpu is going offline. @cpu will not call ops.enqueue() or
++       * ops.dispatch(), nor run tasks associated with other CPUs afterwards.
++       */
++      void (*cpu_offline)(s32 cpu);
++
++      /*
++       * All CPU hotplug ops must come before ops.init().
++       */
++
++      /**
++       * @init: Initialize the BPF scheduler
++       */
++      s32 (*init)(void);
++
++      /**
++       * @exit: Clean up after the BPF scheduler
++       * @info: Exit info
++       *
++       * ops.exit() is also called on ops.init() failure, which is a bit
++       * unusual. This is to allow rich reporting through @info on how
++       * ops.init() failed.
++       */
++      void (*exit)(struct scx_exit_info *info);
++
++      /**
++       * @dispatch_max_batch: Max nr of tasks that dispatch() can dispatch
++       */
++      u32 dispatch_max_batch;
++
++      /**
++       * @flags: %SCX_OPS_* flags
++       */
++      u64 flags;
++
++      /**
++       * @timeout_ms: The maximum amount of time, in milliseconds, that a
++       * runnable task should be able to wait before being scheduled. The
++       * maximum timeout may not exceed the default timeout of 30 seconds.
++       *
++       * Defaults to the maximum allowed timeout value of 30 seconds.
++       */
++      u32 timeout_ms;
++
++      /**
++       * @exit_dump_len: scx_exit_info.dump buffer length. If 0, the default
++       * value of 32768 is used.
++       */
++      u32 exit_dump_len;
++
++      /**
++       * @hotplug_seq: A sequence number that may be set by the scheduler to
++       * detect when a hotplug event has occurred during the loading process.
++       * If 0, no detection occurs. Otherwise, the scheduler will fail to
++       * load if the sequence number does not match @scx_hotplug_seq on the
++       * enable path.
++       */
++      u64 hotplug_seq;
++
++      /**
++       * @name: BPF scheduler's name
++       *
++       * Must be a non-zero valid BPF object name including only isalnum(),
++       * '_' and '.' chars. Shows up in kernel.sched_ext_ops sysctl while the
++       * BPF scheduler is enabled.
++       */
++      char name[SCX_OPS_NAME_LEN];
++
++      /* internal use only, must be NULL */
++      void *priv;
++};
++
++enum scx_opi {
++      SCX_OPI_BEGIN                   = 0,
++      SCX_OPI_NORMAL_BEGIN            = 0,
++      SCX_OPI_NORMAL_END              = SCX_OP_IDX(cpu_online),
++      SCX_OPI_CPU_HOTPLUG_BEGIN       = SCX_OP_IDX(cpu_online),
++      SCX_OPI_CPU_HOTPLUG_END         = SCX_OP_IDX(init),
++      SCX_OPI_END                     = SCX_OP_IDX(init),
++};
++
++/*
++ * Collection of event counters. Event types are placed in descending order.
++ */
++struct scx_event_stats {
++      /*
++       * If ops.select_cpu() returns a CPU which can't be used by the task,
++       * the core scheduler code silently picks a fallback CPU.
++       */
++      s64             SCX_EV_SELECT_CPU_FALLBACK;
++
++      /*
++       * When dispatching to a local DSQ, the CPU may have gone offline in
++       * the meantime. In this case, the task is bounced to the global DSQ.
++       */
++      s64             SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE;
++
++      /*
++       * If SCX_OPS_ENQ_LAST is not set, the number of times that a task
++       * continued to run because there were no other tasks on the CPU.
++       */
++      s64             SCX_EV_DISPATCH_KEEP_LAST;
++
++      /*
++       * If SCX_OPS_ENQ_EXITING is not set, the number of times that a task
++       * is dispatched to a local DSQ when exiting.
++       */
++      s64             SCX_EV_ENQ_SKIP_EXITING;
++
++      /*
++       * If SCX_OPS_ENQ_MIGRATION_DISABLED is not set, the number of times a
++       * migration disabled task skips ops.enqueue() and is dispatched to its
++       * local DSQ.
++       */
++      s64             SCX_EV_ENQ_SKIP_MIGRATION_DISABLED;
++
++      /*
++       * Total number of times a task's time slice was refilled with the
++       * default value (SCX_SLICE_DFL).
++       */
++      s64             SCX_EV_REFILL_SLICE_DFL;
++
++      /*
++       * The total duration of bypass modes in nanoseconds.
++       */
++      s64             SCX_EV_BYPASS_DURATION;
++
++      /*
++       * The number of tasks dispatched in the bypassing mode.
++       */
++      s64             SCX_EV_BYPASS_DISPATCH;
++
++      /*
++       * The number of times the bypassing mode has been activated.
++       */
++      s64             SCX_EV_BYPASS_ACTIVATE;
++};
++
++struct scx_sched_pcpu {
++      /*
++       * The event counters are in a per-CPU variable to minimize the
++       * accounting overhead. A system-wide view on the event counter is
++       * constructed when requested by scx_bpf_events().
++       */
++      struct scx_event_stats  event_stats;
++};
++
++struct scx_sched {
++      struct sched_ext_ops    ops;
++      DECLARE_BITMAP(has_op, SCX_OPI_END);
++
++      /*
++       * Dispatch queues.
++       *
++       * The global DSQ (%SCX_DSQ_GLOBAL) is split per-node for scalability.
++       * This is to avoid live-locking in bypass mode where all tasks are
++       * dispatched to %SCX_DSQ_GLOBAL and all CPUs consume from it. If
++       * per-node split isn't sufficient, it can be further split.
++       */
++      struct rhashtable       dsq_hash;
++      struct scx_dispatch_q   **global_dsqs;
++      struct scx_sched_pcpu __percpu *pcpu;
++
++      bool                    warned_zero_slice;
++
++      atomic_t                exit_kind;
++      struct scx_exit_info    *exit_info;
++
++      struct kobject          kobj;
++
++      struct kthread_worker   *helper;
++      struct irq_work         error_irq_work;
++      struct kthread_work     disable_work;
++      struct rcu_work         rcu_work;
++};
++
++enum scx_wake_flags {
++      /* expose select WF_* flags as enums */
++      SCX_WAKE_FORK           = WF_FORK,
++      SCX_WAKE_TTWU           = WF_TTWU,
++      SCX_WAKE_SYNC           = WF_SYNC,
++};
++
++enum scx_enq_flags {
++      /* expose select ENQUEUE_* flags as enums */
++      SCX_ENQ_WAKEUP          = ENQUEUE_WAKEUP,
++      SCX_ENQ_HEAD            = ENQUEUE_HEAD,
++      SCX_ENQ_CPU_SELECTED    = ENQUEUE_RQ_SELECTED,
++
++      /* high 32bits are SCX specific */
++
++      /*
++       * Set the following to trigger preemption when calling
++       * scx_bpf_dsq_insert() with a local dsq as the target. The slice of the
++       * current task is cleared to zero and the CPU is kicked into the
++       * scheduling path. Implies %SCX_ENQ_HEAD.
++       */
++      SCX_ENQ_PREEMPT         = 1LLU << 32,
++
++      /*
++       * The task being enqueued was previously enqueued on the current CPU's
++       * %SCX_DSQ_LOCAL, but was removed from it in a call to the
++       * scx_bpf_reenqueue_local() kfunc. If scx_bpf_reenqueue_local() was
++       * invoked in a ->cpu_release() callback, and the task is again
++       * dispatched back to %SCX_LOCAL_DSQ by this current ->enqueue(), the
++       * task will not be scheduled on the CPU until at least the next 
invocation
++       * of the ->cpu_acquire() callback.
++       */
++      SCX_ENQ_REENQ           = 1LLU << 40,
++
++      /*
++       * The task being enqueued is the only task available for the cpu. By
++       * default, ext core keeps executing such tasks but when
++       * %SCX_OPS_ENQ_LAST is specified, they're ops.enqueue()'d with the
++       * %SCX_ENQ_LAST flag set.
++       *
++       * The BPF scheduler is responsible for triggering a follow-up
++       * scheduling event. Otherwise, Execution may stall.
++       */
++      SCX_ENQ_LAST            = 1LLU << 41,
++
++      /* high 8 bits are internal */
++      __SCX_ENQ_INTERNAL_MASK = 0xffLLU << 56,
++
++      SCX_ENQ_CLEAR_OPSS      = 1LLU << 56,
++      SCX_ENQ_DSQ_PRIQ        = 1LLU << 57,
++};
++
++enum scx_deq_flags {
++      /* expose select DEQUEUE_* flags as enums */
++      SCX_DEQ_SLEEP           = DEQUEUE_SLEEP,
++
++      /* high 32bits are SCX specific */
++
++      /*
++       * The generic core-sched layer decided to execute the task even though
++       * it hasn't been dispatched yet. Dequeue from the BPF side.
++       */
++      SCX_DEQ_CORE_SCHED_EXEC = 1LLU << 32,
++};
++
++enum scx_pick_idle_cpu_flags {
++      SCX_PICK_IDLE_CORE      = 1LLU << 0,    /* pick a CPU whose SMT 
siblings are also idle */
++      SCX_PICK_IDLE_IN_NODE   = 1LLU << 1,    /* pick a CPU in the same 
target NUMA node */
++};
++
++enum scx_kick_flags {
++      /*
++       * Kick the target CPU if idle. Guarantees that the target CPU goes
++       * through at least one full scheduling cycle before going idle. If the
++       * target CPU can be determined to be currently not idle and going to go
++       * through a scheduling cycle before going idle, noop.
++       */
++      SCX_KICK_IDLE           = 1LLU << 0,
++
++      /*
++       * Preempt the current task and execute the dispatch path. If the
++       * current task of the target CPU is an SCX task, its ->scx.slice is
++       * cleared to zero before the scheduling path is invoked so that the
++       * task expires and the dispatch path is invoked.
++       */
++      SCX_KICK_PREEMPT        = 1LLU << 1,
++
++      /*
++       * Wait for the CPU to be rescheduled. The scx_bpf_kick_cpu() call will
++       * return after the target CPU finishes picking the next task.
++       */
++      SCX_KICK_WAIT           = 1LLU << 2,
++};
++
++enum scx_tg_flags {
++      SCX_TG_ONLINE           = 1U << 0,
++      SCX_TG_INITED           = 1U << 1,
++};
++
++enum scx_enable_state {
++      SCX_ENABLING,
++      SCX_ENABLED,
++      SCX_DISABLING,
++      SCX_DISABLED,
++};
++
++static const char *scx_enable_state_str[] = {
++      [SCX_ENABLING]          = "enabling",
++      [SCX_ENABLED]           = "enabled",
++      [SCX_DISABLING]         = "disabling",
++      [SCX_DISABLED]          = "disabled",
++};
++
++/*
++ * sched_ext_entity->ops_state
++ *
++ * Used to track the task ownership between the SCX core and the BPF 
scheduler.
++ * State transitions look as follows:
++ *
++ * NONE -> QUEUEING -> QUEUED -> DISPATCHING
++ *   ^              |                 |
++ *   |              v                 v
++ *   \-------------------------------/
++ *
++ * QUEUEING and DISPATCHING states can be waited upon. See wait_ops_state() 
call
++ * sites for explanations on the conditions being waited upon and why they are
++ * safe. Transitions out of them into NONE or QUEUED must store_release and 
the
++ * waiters should load_acquire.
++ *
++ * Tracking scx_ops_state enables sched_ext core to reliably determine whether
++ * any given task can be dispatched by the BPF scheduler at all times and thus
++ * relaxes the requirements on the BPF scheduler. This allows the BPF 
scheduler
++ * to try to dispatch any task anytime regardless of its state as the SCX core
++ * can safely reject invalid dispatches.
++ */
++enum scx_ops_state {
++      SCX_OPSS_NONE,          /* owned by the SCX core */
++      SCX_OPSS_QUEUEING,      /* in transit to the BPF scheduler */
++      SCX_OPSS_QUEUED,        /* owned by the BPF scheduler */
++      SCX_OPSS_DISPATCHING,   /* in transit back to the SCX core */
++
++      /*
++       * QSEQ brands each QUEUED instance so that, when dispatch races
++       * dequeue/requeue, the dispatcher can tell whether it still has a claim
++       * on the task being dispatched.
++       *
++       * As some 32bit archs can't do 64bit store_release/load_acquire,
++       * p->scx.ops_state is atomic_long_t which leaves 30 bits for QSEQ on
++       * 32bit machines. The dispatch race window QSEQ protects is very narrow
++       * and runs with IRQ disabled. 30 bits should be sufficient.
++       */
++      SCX_OPSS_QSEQ_SHIFT     = 2,
++};
++
++/* Use macros to ensure that the type is unsigned long for the masks */
++#define SCX_OPSS_STATE_MASK   ((1LU << SCX_OPSS_QSEQ_SHIFT) - 1)
++#define SCX_OPSS_QSEQ_MASK    (~SCX_OPSS_STATE_MASK)
++
++DECLARE_PER_CPU(struct rq *, scx_locked_rq_state);
++
++/*
++ * Return the rq currently locked from an scx callback, or NULL if no rq is
++ * locked.
++ */
++static inline struct rq *scx_locked_rq(void)
++{
++      return __this_cpu_read(scx_locked_rq_state);
++}
++
++static inline bool scx_kf_allowed_if_unlocked(void)
++{
++      return !current->scx.kf_mask;
++}
++
++static inline bool scx_rq_bypassing(struct rq *rq)
++{
++      return unlikely(rq->scx.flags & SCX_RQ_BYPASSING);
++}
+diff --git a/kernel/seccomp.c b/kernel/seccomp.c
+index 3bbfba30a777a1..25f62867a16d93 100644
+--- a/kernel/seccomp.c
++++ b/kernel/seccomp.c
+@@ -741,6 +741,26 @@ seccomp_prepare_user_filter(const char __user 
*user_filter)
+ }
+ 
+ #ifdef SECCOMP_ARCH_NATIVE
++static bool seccomp_uprobe_exception(struct seccomp_data *sd)
++{
++#if defined __NR_uretprobe || defined __NR_uprobe
++#ifdef SECCOMP_ARCH_COMPAT
++      if (sd->arch == SECCOMP_ARCH_NATIVE)
++#endif
++      {
++#ifdef __NR_uretprobe
++              if (sd->nr == __NR_uretprobe)
++                      return true;
++#endif
++#ifdef __NR_uprobe
++              if (sd->nr == __NR_uprobe)
++                      return true;
++#endif
++      }
++#endif
++      return false;
++}
++
+ /**
+  * seccomp_is_const_allow - check if filter is constant allow with given data
+  * @fprog: The BPF programs
+@@ -758,13 +778,8 @@ static bool seccomp_is_const_allow(struct sock_fprog_kern 
*fprog,
+               return false;
+ 
+       /* Our single exception to filtering. */
+-#ifdef __NR_uretprobe
+-#ifdef SECCOMP_ARCH_COMPAT
+-      if (sd->arch == SECCOMP_ARCH_NATIVE)
+-#endif
+-              if (sd->nr == __NR_uretprobe)
+-                      return true;
+-#endif
++      if (seccomp_uprobe_exception(sd))
++              return true;
+ 
+       for (pc = 0; pc < fprog->len; pc++) {
+               struct sock_filter *insn = &fprog->filter[pc];
+@@ -1042,6 +1057,9 @@ static const int mode1_syscalls[] = {
+       __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, 
__NR_seccomp_sigreturn,
+ #ifdef __NR_uretprobe
+       __NR_uretprobe,
++#endif
++#ifdef __NR_uprobe
++      __NR_uprobe,
+ #endif
+       -1, /* negative terminated */
+ };
+diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
+index b6974fce800cd8..3a4d3b2e3f7409 100644
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -3070,7 +3070,7 @@ static int __init tk_aux_sysfs_init(void)
+               return -ENOMEM;
+       }
+ 
+-      for (int i = 0; i <= MAX_AUX_CLOCKS; i++) {
++      for (int i = 0; i < MAX_AUX_CLOCKS; i++) {
+               char id[2] = { [0] = '0' + i, };
+               struct kobject *clk = kobject_create_and_add(id, auxo);
+ 
+diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c
+index 69d877501cb727..cd50a94326e3a9 100644
+--- a/tools/sched_ext/scx_qmap.bpf.c
++++ b/tools/sched_ext/scx_qmap.bpf.c
+@@ -56,7 +56,8 @@ struct qmap {
+   queue1 SEC(".maps"),
+   queue2 SEC(".maps"),
+   queue3 SEC(".maps"),
+-  queue4 SEC(".maps");
++  queue4 SEC(".maps"),
++  dump_store SEC(".maps");
+ 
+ struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+@@ -578,11 +579,26 @@ void BPF_STRUCT_OPS(qmap_dump, struct scx_dump_ctx *dctx)
+                       return;
+ 
+               scx_bpf_dump("QMAP FIFO[%d]:", i);
++
++              /*
++               * Dump can be invoked anytime and there is no way to iterate in
++               * a non-destructive way. Pop and store in dump_store and then
++               * restore afterwards. If racing against new enqueues, ordering
++               * can get mixed up.
++               */
+               bpf_repeat(4096) {
+                       if (bpf_map_pop_elem(fifo, &pid))
+                               break;
++                      bpf_map_push_elem(&dump_store, &pid, 0);
+                       scx_bpf_dump(" %d", pid);
+               }
++
++              bpf_repeat(4096) {
++                      if (bpf_map_pop_elem(&dump_store, &pid))
++                              break;
++                      bpf_map_push_elem(fifo, &pid, 0);
++              }
++
+               scx_bpf_dump("\n");
+       }
+ }

Reply via email to