On Fri, Oct 24, 2025 at 12:09:57PM -0400, Pasha Tatashin wrote:
> From: "Mike Rapoport (Microsoft)" <[email protected]>
> 
> The KHO framework uses a notifier chain as the mechanism for clients to
> participate in the finalization process. While this works for a single,
> central state machine, it is too restrictive for kernel-internal
> components like pstore/reserve_mem or IMA. These components need a
> simpler, direct way to register their state for preservation (e.g.,
> during their initcall) without being part of a complex,
> shutdown-time notifier sequence. The notifier model forces all
> participants into a single finalization flow and makes direct
> preservation from an arbitrary context difficult.
> This patch refactors the client participation model by removing the
> notifier chain and introducing a direct API for managing FDT subtrees.
> 
> The core kho_finalize() and kho_abort() state machine remains, but
> clients now register their data with KHO beforehand.
> 
> Signed-off-by: Mike Rapoport (Microsoft) <[email protected]>
> Co-developed-by: Pasha Tatashin <[email protected]>
> Signed-off-by: Pasha Tatashin <[email protected]>
> ---
>  include/linux/kexec_handover.h   |  28 +-----
>  kernel/kexec_handover.c          | 166 +++++++++++++++++--------------
>  kernel/kexec_handover_debugfs.c  |  17 ++--
>  kernel/kexec_handover_internal.h |   5 +-
>  lib/test_kho.c                   |  33 +-----
>  mm/memblock.c                    |  62 +++---------
>  6 files changed, 126 insertions(+), 185 deletions(-)

> diff --git a/lib/test_kho.c b/lib/test_kho.c
> index 60cd899ea745..1c6c4ce83666 100644
> --- a/lib/test_kho.c
> +++ b/lib/test_kho.c
> @@ -120,6 +93,7 @@ static int kho_test_prepare_fdt(struct kho_test_state 
> *state)
>  
>       fdt = folio_address(state->fdt);
>  
> +     err |= kho_preserve_folio(state->fdt);

We should bail out here, no point creating an fdt if it won't be preserved.

>       err |= fdt_create(fdt, fdt_size);
>       err |= fdt_finish_reservemap(fdt);
>  
> @@ -131,6 +105,7 @@ static int kho_test_prepare_fdt(struct kho_test_state 
> *state)
>  
>       err |= fdt_finish(fdt);
>  
> +     err = kho_add_subtree(KHO_TEST_FDT, folio_address(state->fdt));
>       if (err)
>               folio_put(state->fdt);
>  
> @@ -203,7 +178,7 @@ static int kho_test_save(void)
>       if (err)
>               goto err_free_folios;
>  
> -     err = register_kho_notifier(&kho_test_nb);
> +     err = kho_add_subtree(KHO_TEST_FDT, folio_address(state->fdt));

This is the second time we add the same subtree, isn't it?

>       if (err)
>               goto err_free_fdt;
>  
> @@ -326,7 +301,7 @@ static void kho_test_cleanup(void)
>  
>  static void __exit kho_test_exit(void)
>  {
> -     unregister_kho_notifier(&kho_test_nb);
> +     kho_remove_subtree(folio_address(kho_test_state.fdt));
>       kho_test_cleanup();
>  }
>  module_exit(kho_test_exit);
> diff --git a/mm/memblock.c b/mm/memblock.c
> index e23e16618e9b..e3bef9b35d63 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
>  static int __init prepare_kho_fdt(void)
>  {
>       int err = 0, i;
> +     struct page *fdt_page;
>       void *fdt;
>  
> -     kho_fdt = alloc_page(GFP_KERNEL);
> -     if (!kho_fdt)
> +     fdt_page = alloc_page(GFP_KERNEL);
> +     if (!fdt_page)
>               return -ENOMEM;
>  
> -     fdt = page_to_virt(kho_fdt);
> +     fdt = page_to_virt(fdt_page);
>  
>       err |= fdt_create(fdt, PAGE_SIZE);
>       err |= fdt_finish_reservemap(fdt);
> @@ -2499,7 +2464,10 @@ static int __init prepare_kho_fdt(void)
>       err |= fdt_property_string(fdt, "compatible", 
> MEMBLOCK_KHO_NODE_COMPATIBLE);
>       for (i = 0; i < reserved_mem_count; i++) {
>               struct reserve_mem_table *map = &reserved_mem_table[i];
> +             struct page *page = phys_to_page(map->start);
> +             unsigned int nr_pages = map->size >> PAGE_SHIFT;
>  
> +             err |= kho_preserve_pages(page, nr_pages);
>               err |= fdt_begin_node(fdt, map->name);
>               err |= fdt_property_string(fdt, "compatible", 
> RESERVE_MEM_KHO_NODE_COMPATIBLE);
>               err |= fdt_property(fdt, "start", &map->start, 
> sizeof(map->start));
> @@ -2507,13 +2475,16 @@ static int __init prepare_kho_fdt(void)
>               err |= fdt_end_node(fdt);
>       }
>       err |= fdt_end_node(fdt);
> -
>       err |= fdt_finish(fdt);
>  
> +     err |= kho_preserve_folio(page_folio(fdt_page));

When looking at the end result after patch 8 it becomes a total mess.
Let's move this right after the allocation and make it

        err = kho_preserve_folio(page_folio(fdt_page);
        if (err)
                goto err_free_fdt;

> +
> +     if (!err)
> +             err = kho_add_subtree(MEMBLOCK_KHO_FDT, fdt);

and replace this pattern with usual kernel

        if (err)
                goto err_free_fdt;

        err = kho_add_subtree(MEMBLOCK_KHO_FDT, fdt);
        if (err)
                goto err_free_fdt;

so that only fdt operations will be a part of 

        err |= fdt_<function> 

sequence.

>       if (err) {
>               pr_err("failed to prepare memblock FDT for KHO: %d\n", err);
> -             put_page(kho_fdt);
> -             kho_fdt = NULL;
> +             put_page(fdt_page);
>       }
>  
>       return err;
> @@ -2529,13 +2500,6 @@ static int __init reserve_mem_init(void)
>       err = prepare_kho_fdt();
>       if (err)
>               return err;
> -
> -     err = register_kho_notifier(&reserve_mem_kho_nb);
> -     if (err) {
> -             put_page(kho_fdt);
> -             kho_fdt = NULL;
> -     }
> -
>       return err;
>  }
>  late_initcall(reserve_mem_init);
> -- 
> 2.51.1.821.gb6fe4d2222-goog
> 
> 

-- 
Sincerely yours,
Mike.

Reply via email to