On Mon, May 04, 2026 at 08:57:33PM +0200, Casey Connolly wrote:
> The memory carveout logic was fairly limited and had a few issues,
> rework it and teach it not to unmap regions that have a compatible
> property (since they may be used in U-Boot) or that don't have the
> no-map property.
> 
> The carveout process adds ~100ms to the boot time depending on the
> platform.
> 
> This prepares us for using SMEM as a source of truth and improving
> support for U-boot as a first stage bootloader since SMEMs memory map
> doesn't already carve out some regions like ABL does.
> 
> Signed-off-by: Casey Connolly <[email protected]>
> ---
>  arch/arm/mach-snapdragon/board.c | 86 
> +++++++++++++++++++++++++---------------
>  1 file changed, 53 insertions(+), 33 deletions(-)
> 
> diff --git a/arch/arm/mach-snapdragon/board.c 
> b/arch/arm/mach-snapdragon/board.c
> index 829a0109ac78..e12d3d00caa4 100644
> --- a/arch/arm/mach-snapdragon/board.c
> +++ b/arch/arm/mach-snapdragon/board.c
> @@ -622,27 +622,36 @@ u64 get_page_table_size(void)
>  {
>       return SZ_1M;
>  }
>  
> +struct mem_resource_attrs {
> +     fdt_addr_t start;
> +     fdt_addr_t size;
> +     u64 attrs;
> +};

Let's move the struct declaration towards the top.

> +
>  static int fdt_cmp_res(const void *v1, const void *v2)

This API should now be renamed as mem_cmp_resources(), no?

>  {
> -     const struct fdt_resource *res1 = v1, *res2 = v2;
> +     const struct mem_resource_attrs *res1 = v1, *res2 = v2;
>  
>       return res1->start - res2->start;
>  }
>  
> -#define N_RESERVED_REGIONS 32
> +#define N_RESERVED_REGIONS 64
>  
> -/* Mark all no-map regions as PTE_TYPE_FAULT to prevent speculative access.
> +/* Map and unmap reserved memory regions as appropriate.
> + * Mark all no-map regions as PTE_TYPE_FAULT to prevent speculative access.
>   * On some platforms this is enough to trigger a security violation and trap
>   * to EL3.
> + * Regions that may be accessed by drivers get mapped explicitly.
>   */
> -static void carve_out_reserved_memory(void)
> +static void configure_reserved_memory(void)
>  {
> -     static struct fdt_resource res[N_RESERVED_REGIONS] = { 0 };
> +     static struct mem_resource_attrs res[N_RESERVED_REGIONS] = { 0 };
>       int parent, rmem, count, i = 0;
>       phys_addr_t start;
>       size_t size;
> +     u64 attrs;
>  
>       /* Some reserved nodes must be carved out, as the cache-prefetcher may 
> otherwise
>        * attempt to access them, causing a security exception.
>        */
> @@ -651,14 +660,19 @@ static void carve_out_reserved_memory(void)
>               log_err("No reserved memory regions found\n");
>               return;
>       }
>  
> -     /* Collect the reserved memory regions */
> +     /* Collect the reserved memory regions and appropriate attrs */
>       fdt_for_each_subnode(rmem, gd->fdt_blob, parent) {
>               const fdt32_t *ptr;
> -             int len;
> +             attrs = PTE_TYPE_FAULT;
> +             /* If the no-map property isn't set then the region is valid */
>               if (!fdt_getprop(gd->fdt_blob, rmem, "no-map", NULL))
> -                     continue;
> +                     attrs = PTE_TYPE_VALID | PTE_BLOCK_MEMTYPE(MT_NORMAL);
> +             /* If the compatible property is set then this region may be 
> accessed by drivers and should
> +              * be marked valid too. */
> +             if (fdt_getprop(gd->fdt_blob, rmem, "compatible", NULL))
> +                     attrs = PTE_TYPE_VALID | PTE_BLOCK_MEMTYPE(MT_NORMAL);
>  
>               if (i == N_RESERVED_REGIONS) {
>                       log_err("Too many reserved regions!\n");
>                       break;
> @@ -667,50 +681,55 @@ static void carve_out_reserved_memory(void)
>               /* Read the address and size out from the reg property. Doing 
> this "properly" with
>                * fdt_get_resource() takes ~70ms on SDM845, but open-coding 
> the happy path here
>                * takes <1ms... Oh the woes of no dcache.
>                */
> -             ptr = fdt_getprop(gd->fdt_blob, rmem, "reg", &len);
> +             ptr = fdt_getprop(gd->fdt_blob, rmem, "reg", NULL);
>               if (ptr) {
>                       /* Qualcomm devices use #address/size-cells = <2> but 
> all reserved regions are within
>                        * the 32-bit address space. So we can cheat here for 
> speed.
>                        */
>                       res[i].start = fdt32_to_cpu(ptr[1]);
> -                     res[i].end = res[i].start + fdt32_to_cpu(ptr[3]);
> +                     res[i].size = fdt32_to_cpu(ptr[3]);
> +                     res[i].attrs = attrs;
>                       i++;
>               }
>       }
>  
>       /* Sort the reserved memory regions by address */
>       count = i;
> -     qsort(res, count, sizeof(struct fdt_resource), fdt_cmp_res);
> +     qsort(res, count, sizeof(res[0]), fdt_cmp_res);
> +     debug("Mapping %d regions!\n", count);
>  
>       /* Now set the right attributes for them. Often a lot of the regions 
> are tightly packed together
> -      * so we can optimise the number of calls to mmu_change_region_attr() 
> by combining adjacent
> +      * so we can optimise the number of calls to 
> mmu_change_region_attr_nobreak() by combining adjacent
>        * regions.
>        */
> -     start = ALIGN_DOWN(res[0].start, SZ_2M);
> -     size = ALIGN(res[0].end - start, SZ_2M);
> +     start = res[0].start;
> +     size = res[0].size;
> +     attrs = res[0].attrs;
> +     /* For each region after the first one, either increase the `size` to 
> eventually be mapped or
> +      * map the region we have and start a new one, this allows us to reduce 
> the number of calls to
> +      * mmu_map_region(). The loop is therefore "lagging" behind by one 
> iteration. */
>       for (i = 1; i <= count; i++) {
> -             /* We ideally want to 2M align everything for more efficient 
> pagetables, but we must avoid
> -              * overwriting reserved memory regions which shouldn't be 
> mapped as FAULT (like those with
> -              * compatible properties).
> -              * If within 2M of the previous region, bump the size to 
> include this region. Otherwise
> -              * start a new region.
> -              */
> -             if (i == count || start + size < res[i].start - SZ_2M) {
> -                     debug("  0x%016llx - 0x%016llx: reserved\n",
> -                           start, start + size);
> -                     mmu_change_region_attr(start, size, PTE_TYPE_FAULT);
> -                     /* If this is the final region then quit here before we 
> index
> -                      * out of bounds...
> -                      */
> +             /* If i == count we are done, just map the last region. If the 
> last region is
> +              * too far away or the attrs don't match then map the 
> meta-region we have and
> +              * start a new one. */
> +             if (i == count || start + size < res[i].start - SZ_8K || attrs 
> != res[i].attrs) {

I suppose this SZ_8K instead of SZ_2M is intentional here? It works now
due to page table optimization I guess?

-Sumit

> +                     debug("  0x%016llx - 0x%016llx: %s\n",
> +                             start, start + size, attrs == PTE_TYPE_FAULT ? 
> "FAULT" : "VALID");
> +                     /* No need to break-before-make since dcache is 
> disabled */
> +                     mmu_change_region_attr_nobreak(start, size, attrs);
> +                     /* We have now mapped all the regions */
>                       if (i == count)
>                               break;
> -                     start = ALIGN_DOWN(res[i].start, SZ_2M);
> -                     size = ALIGN(res[i].end - start, SZ_2M);
> +                     /* Start a new meta-region */
> +                     start = res[i].start;
> +                     size = res[i].size;
> +                     attrs = res[i].attrs;
>               } else {
> -                     /* Bump size if this region is immediately after the 
> previous one */
> -                     size = ALIGN(res[i].end - start, SZ_2M);
> +                     /* This region is next to (<8K) the previous one so 
> combine them.
> +                      * Accounting for any small (<8K) gap. */
> +                     size = (res[i].start - start) + res[i].size;
>               }
>       }
>  }
>  
> @@ -744,13 +763,14 @@ void enable_caches(void)
>       gd->arch.tlb_emerg = gd->arch.tlb_addr;
>       gd->arch.tlb_addr = tlb_addr;
>       gd->arch.tlb_size = tlb_size;
>  
> -     /* We do the carveouts only for QCS404, for now. */
> +     /* On some boards speculative access may trigger a NOC or XPU violation 
> so explicitly mark reserved
> +      * regions as inacessible (PTE_TYPE_FAULT) */
>       if (fdt_node_check_compatible(gd->fdt_blob, 0, "qcom,qcs404") == 0) {
>               carveout_start = get_timer(0);
>               /* Takes ~20-50ms on SDM845 */
> -             carve_out_reserved_memory();
> +             configure_reserved_memory();
>               debug("carveout time: %lums\n", get_timer(carveout_start));
>       }
>       dcache_enable();
>  }
> 
> -- 
> 2.53.0
> 

Reply via email to