Hi Loic,

On 7/27/18 8:14 AM, Loic Pallardy wrote:
> Memory entry could be allocated in different ways (ioremap,
> dma_alloc_coherent, internal RAM allocator...).
> This patch introduces an alloc ops in rproc_mem_entry structure
> to associate dedicated allocation mechanism to each memory entry
> descriptor in order to do remote core agnostic from memory allocators.
> 
> The introduction of this ops allows to perform allocation of all registered
> carveout at the same time, just before calling rproc_start().
> It simplifies and makes uniform carveout management whatever origin.

This patch is causing a kernel crash with trace entries. Please see
further below for the cause.

> 
> Signed-off-by: Loic Pallardy <[email protected]>
> ---
>  drivers/remoteproc/remoteproc_core.c | 261 
> ++++++++++++++++++++++-------------
>  include/linux/remoteproc.h           |   7 +
>  2 files changed, 175 insertions(+), 93 deletions(-)
> 
> diff --git a/drivers/remoteproc/remoteproc_core.c 
> b/drivers/remoteproc/remoteproc_core.c
> index 77b39ba..2c51549 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -642,74 +642,31 @@ static int rproc_handle_devmem(struct rproc *rproc, 
> struct fw_rsc_devmem *rsc,
>  }
>  
>  /**
> - * rproc_release_carveout() - release acquired carveout
> + * rproc_alloc_carveout() - allocated specified carveout
>   * @rproc: rproc handle
> - * @mem: the memory entry to release
> - *
> - * This function releases specified memory entry @mem allocated via
> - * dma_alloc_coherent() function by @rproc.
> - */
> -static int rproc_release_carveout(struct rproc *rproc,
> -                               struct rproc_mem_entry *mem)
> -{
> -     struct device *dev = &rproc->dev;
> -
> -     /* clean up carveout allocations */
> -     dma_free_coherent(dev->parent, mem->len, mem->va, mem->dma);
> -     return 0;
> -}
> -
> -/**
> - * rproc_handle_carveout() - handle phys contig memory allocation requests
> - * @rproc: rproc handle
> - * @rsc: the resource entry
> - * @avail: size of available data (for image validation)
> - *
> - * This function will handle firmware requests for allocation of physically
> - * contiguous memory regions.
> - *
> - * These request entries should come first in the firmware's resource table,
> - * as other firmware entries might request placing other data objects inside
> - * these memory regions (e.g. data/code segments, trace resource entries, 
> ...).
> + * @mem: the memory entry to allocate
>   *
> - * Allocating memory this way helps utilizing the reserved physical memory
> - * (e.g. CMA) more efficiently, and also minimizes the number of TLB entries
> - * needed to map it (in case @rproc is using an IOMMU). Reducing the TLB
> - * pressure is important; it may have a substantial impact on performance.
> + * This function allocate specified memory entry @mem using
> + * dma_alloc_coherent() as default allocator
>   */
> -static int rproc_handle_carveout(struct rproc *rproc,
> -                              struct fw_rsc_carveout *rsc,
> -                              int offset, int avail)
> +static int rproc_alloc_carveout(struct rproc *rproc,
> +                             struct rproc_mem_entry *mem)
>  {
> -     struct rproc_mem_entry *carveout, *mapping = NULL;
> +     struct rproc_mem_entry *mapping = NULL;
>       struct device *dev = &rproc->dev;
>       dma_addr_t dma;
>       void *va;
>       int ret;
>  
> -     if (sizeof(*rsc) > avail) {
> -             dev_err(dev, "carveout rsc is truncated\n");
> -             return -EINVAL;
> -     }
> -
> -     /* make sure reserved bytes are zeroes */
> -     if (rsc->reserved) {
> -             dev_err(dev, "carveout rsc has non zero reserved bytes\n");
> -             return -EINVAL;
> -     }
> -
> -     dev_dbg(dev, "carveout rsc: name: %s, da 0x%x, pa 0x%x, len 0x%x, flags 
> 0x%x\n",
> -             rsc->name, rsc->da, rsc->pa, rsc->len, rsc->flags);
> -
> -     va = dma_alloc_coherent(dev->parent, rsc->len, &dma, GFP_KERNEL);
> +     va = dma_alloc_coherent(dev->parent, mem->len, &dma, GFP_KERNEL);
>       if (!va) {
>               dev_err(dev->parent,
> -                     "failed to allocate dma memory: len 0x%x\n", rsc->len);
> +                     "failed to allocate dma memory: len 0x%x\n", mem->len);
>               return -ENOMEM;
>       }
>  
>       dev_dbg(dev, "carveout va %pK, dma %pad, len 0x%x\n",
> -             va, &dma, rsc->len);
> +             va, &dma, mem->len);
>  
>       /*
>        * Ok, this is non-standard.
> @@ -729,22 +686,22 @@ static int rproc_handle_carveout(struct rproc *rproc,
>        * physical address in this case.
>        */
>  
> -     if (rsc->da != FW_RSC_ADDR_ANY && !rproc->domain) {
> -             dev_err(dev->parent,
> -                     "Bad carveout rsc configuration\n");
> -             ret = -ENOMEM;
> -             goto dma_free;
> -     }
> +     if (mem->da != FW_RSC_ADDR_ANY) {
> +             if (!rproc->domain) {
> +                     dev_err(dev->parent,
> +                             "Bad carveout rsc configuration\n");
> +                     ret = -ENOMEM;
> +                     goto dma_free;
> +             }

Same comment from Patch 1.

>  
> -     if (rsc->da != FW_RSC_ADDR_ANY && rproc->domain) {
>               mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
>               if (!mapping) {
>                       ret = -ENOMEM;
>                       goto dma_free;
>               }
>  
> -             ret = iommu_map(rproc->domain, rsc->da, dma, rsc->len,
> -                             rsc->flags);
> +             ret = iommu_map(rproc->domain, mem->da, dma, mem->len,
> +                             mem->flags);
>               if (ret) {
>                       dev_err(dev, "iommu_map failed: %d\n", ret);
>                       goto free_mapping;
> @@ -757,52 +714,102 @@ static int rproc_handle_carveout(struct rproc *rproc,
>                * We can't trust the remote processor not to change the
>                * resource table, so we must maintain this info independently.
>                */
> -             mapping->da = rsc->da;
> -             mapping->len = rsc->len;
> +             mapping->da = mem->da;
> +             mapping->len = mem->len;
>               list_add_tail(&mapping->node, &rproc->mappings);
>  
>               dev_dbg(dev, "carveout mapped 0x%x to %pad\n",
> -                     rsc->da, &dma);
> +                     mem->da, &dma);
> +     } else {
> +             mem->da = (u32)dma;

Hmm, what was the purpose of this? So, this appears to be handling the
missing implementation for the comment in the fw_rsc_carveout about
FW_RSC_ADDR_ANY.

>       }
>  
> -     /*
> -      * Some remote processors might need to know the pa
> -      * even though they are behind an IOMMU. E.g., OMAP4's
> -      * remote M3 processor needs this so it can control
> -      * on-chip hardware accelerators that are not behind
> -      * the IOMMU, and therefor must know the pa.
> -      *
> -      * Generally we don't want to expose physical addresses
> -      * if we don't have to (remote processors are generally
> -      * _not_ trusted), so we might want to do this only for
> -      * remote processor that _must_ have this (e.g. OMAP4's
> -      * dual M3 subsystem).
> -      *
> -      * Non-IOMMU processors might also want to have this info.
> -      * In this case, the device address and the physical address
> -      * are the same.
> -      */
> -     rsc->pa = (u32)rproc_va_to_pa(va);
> -
> -     carveout = rproc_mem_entry_init(dev, va, dma, rsc->len, rsc->da,
> -                                     rproc_release_carveout, rsc->name);
> -     if (!carveout)
> -             goto free_carv;
> -
> -     rproc_add_carveout(rproc, carveout);
> +     mem->dma = (u32)dma;

We don't need the typecast, mem->dma is already of type dma_addr_t. Same
comment above on the else part as well.

> +     mem->va = va;
>  
>       return 0;
>  
> -free_carv:
> -     kfree(carveout);
>  free_mapping:
>       kfree(mapping);
>  dma_free:
> -     dma_free_coherent(dev->parent, rsc->len, va, dma);
> +     dma_free_coherent(dev->parent, mem->len, va, dma);
>       return ret;
>  }
>  
>  /**
> + * rproc_release_carveout() - release acquired carveout
> + * @rproc: rproc handle
> + * @mem: the memory entry to release
> + *
> + * This function releases specified memory entry @mem allocated via
> + * rproc_alloc_carveout() function by @rproc.
> + */
> +static int rproc_release_carveout(struct rproc *rproc,
> +                               struct rproc_mem_entry *mem)
> +{
> +     struct device *dev = &rproc->dev;
> +
> +     /* clean up carveout allocations */
> +     dma_free_coherent(dev->parent, mem->len, mem->va, mem->dma);
> +     return 0;
> +}
> +
> +/**
> + * rproc_handle_carveout() - handle phys contig memory allocation requests
> + * @rproc: rproc handle
> + * @rsc: the resource entry
> + * @avail: size of available data (for image validation)
> + *
> + * This function will handle firmware requests for allocation of physically
> + * contiguous memory regions.
> + *
> + * These request entries should come first in the firmware's resource table,
> + * as other firmware entries might request placing other data objects inside
> + * these memory regions (e.g. data/code segments, trace resource entries, 
> ...).
> + *
> + * Allocating memory this way helps utilizing the reserved physical memory
> + * (e.g. CMA) more efficiently, and also minimizes the number of TLB entries
> + * needed to map it (in case @rproc is using an IOMMU). Reducing the TLB
> + * pressure is important; it may have a substantial impact on performance.
> + */
> +static int rproc_handle_carveout(struct rproc *rproc,
> +                              struct fw_rsc_carveout *rsc,
> +                              int offset, int avail)
> +{
> +     struct rproc_mem_entry *carveout;
> +     struct device *dev = &rproc->dev;
> +
> +     if (sizeof(*rsc) > avail) {
> +             dev_err(dev, "carveout rsc is truncated\n");
> +             return -EINVAL;
> +     }
> +
> +     /* make sure reserved bytes are zeroes */
> +     if (rsc->reserved) {
> +             dev_err(dev, "carveout rsc has non zero reserved bytes\n");
> +             return -EINVAL;
> +     }
> +
> +     dev_dbg(dev, "carveout rsc: name: %s, da 0x%x, pa 0x%x, len 0x%x, flags 
> 0x%x\n",
> +             rsc->name, rsc->da, rsc->pa, rsc->len, rsc->flags);
> +
> +     /* Register carveout in in list */
> +     carveout = rproc_mem_entry_init(dev, 0, 0, rsc->len, rsc->da,
> +                                     rproc_alloc_carveout,
> +                                     rproc_release_carveout, rsc->name);
> +     if (!carveout) {
> +             dev_err(dev, "Can't allocate memory entry structure\n");
> +             return -ENOMEM;
> +     }
> +
> +     carveout->flags = rsc->flags;
> +     carveout->rsc_offset = offset;
> +     rproc_add_carveout(rproc, carveout);

Once we get rid of rproc_add_carveout, the list addition will mostly be
handled in rproc_mem_entry_init itself.

> +
> +     return 0;
> +}
> +
> +/**
>   * rproc_add_carveout() - register an allocated carveout region
>   * @rproc: rproc handle
>   * @mem: memory entry to register
> @@ -832,6 +839,7 @@ void rproc_add_carveout(struct rproc *rproc, struct 
> rproc_mem_entry *mem)
>  struct rproc_mem_entry *
>  rproc_mem_entry_init(struct device *dev,
>                    void *va, dma_addr_t dma, int len, u32 da,
> +                  int (*alloc)(struct rproc *, struct rproc_mem_entry *),
>                    int (*release)(struct rproc *, struct rproc_mem_entry *),
>                    const char *name, ...)
>  {
> @@ -846,7 +854,9 @@ struct rproc_mem_entry *
>       mem->dma = dma;
>       mem->da = da;
>       mem->len = len;
> +     mem->alloc = alloc;
>       mem->release = release;
> +     mem->rsc_offset = FW_RSC_ADDR_ANY;
>  
>       va_start(args, name);
>       vsnprintf(mem->name, sizeof(mem->name), name, args);
> @@ -978,6 +988,63 @@ static void rproc_unprepare_subdevices(struct rproc 
> *rproc)
>  }
>  
>  /**
> + * rproc_alloc_registered_carveouts() - allocate all carveouts registered
> + * in the list
> + * @rproc: the remote processor handle
> + *
> + * This function parses registered carveout list, performs allocation
> + * if alloc() ops registered and updates resource table information
> + * if rsc_offset set.
> + *
> + * Return: 0 on success
> + */
> +static int rproc_alloc_registered_carveouts(struct rproc *rproc)
> +{
> +     struct rproc_mem_entry *entry, *tmp;
> +     struct fw_rsc_carveout *rsc;
> +     struct device *dev = &rproc->dev;
> +     int ret;
> +
> +     list_for_each_entry_safe(entry, tmp, &rproc->carveouts, node) {
> +             if (entry->alloc) {
> +                     ret = entry->alloc(rproc, entry);
> +                     if (ret) {
> +                             dev_err(dev, "Unable to allocate carveout %s: 
> %d\n",
> +                                     entry->name, ret);
> +                             return -ENOMEM;
> +                     }
> +             }
> +
> +             if (entry->rsc_offset != FW_RSC_ADDR_ANY) {
> +                     /* update resource table */
> +                     rsc = (void *)rproc->table_ptr + entry->rsc_offset;
> +
> +                     /*
> +                      * Some remote processors might need to know the pa
> +                      * even though they are behind an IOMMU. E.g., OMAP4's
> +                      * remote M3 processor needs this so it can control
> +                      * on-chip hardware accelerators that are not behind
> +                      * the IOMMU, and therefor must know the pa.
> +                      *
> +                      * Generally we don't want to expose physical addresses
> +                      * if we don't have to (remote processors are generally
> +                      * _not_ trusted), so we might want to do this only for
> +                      * remote processor that _must_ have this (e.g. OMAP4's
> +                      * dual M3 subsystem).
> +                      *
> +                      * Non-IOMMU processors might also want to have this 
> info.
> +                      * In this case, the device address and the physical 
> address
> +                      * are the same.
> +                      */
> +                     if (entry->va)
> +                             rsc->pa = (u32)rproc_va_to_pa(entry->va);
> +             }
> +     }
> +
> +     return 0;
> +}
> +
> +/**
>   * rproc_coredump_cleanup() - clean up dump_segments list
>   * @rproc: the remote processor handle
>   */
> @@ -1148,6 +1215,14 @@ static int rproc_fw_boot(struct rproc *rproc, const 
> struct firmware *fw)
>               goto clean_up_resources;
>       }
>  
> +     /* Allocate carveout resources associated to rproc */
> +     ret = rproc_alloc_registered_carveouts(rproc);
> +     if (ret) {
> +             dev_err(dev, "Failed to allocate associated carveouts: %d\n",
> +                     ret);
> +             goto clean_up_resources;
> +     }

This is causing an issue with RSC_TRACE on where the trace region on the
remote processor is actually backed by a DDR carveout address. The
allocations are now being done after processing the resources from the
rproc_loading_handlers, which causes the RSC_TRACE to be configured with
an incorrect kernel va, and accessing it through debugfs then results in
a kernel crash.

regards
Suman

> +
>       ret = rproc_start(rproc, fw);
>       if (ret)
>               goto clean_up_resources;
> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> index 55f30fc..ea95b04 100644
> --- a/include/linux/remoteproc.h
> +++ b/include/linux/remoteproc.h
> @@ -317,6 +317,9 @@ struct fw_rsc_vdev {
>   * @priv: associated data
>   * @name: associated memory region name (optional)
>   * @node: list node
> + * @rsc_offset: offset in resource table
> + * @flags: iommu protection flags
> + * @alloc: specific memory allocator function
>   */
>  struct rproc_mem_entry {
>       void *va;
> @@ -326,6 +329,9 @@ struct rproc_mem_entry {
>       void *priv;
>       char name[32];
>       struct list_head node;
> +     u32 rsc_offset;
> +     u32 flags;
> +     int (*alloc)(struct rproc *rproc, struct rproc_mem_entry *mem);
>       int (*release)(struct rproc *rproc, struct rproc_mem_entry *mem);
>  };
>  
> @@ -563,6 +569,7 @@ struct rproc *rproc_alloc(struct device *dev, const char 
> *name,
>  struct rproc_mem_entry *
>  rproc_mem_entry_init(struct device *dev,
>                    void *va, dma_addr_t dma, int len, u32 da,
> +                  int (*alloc)(struct rproc *, struct rproc_mem_entry *),
>                    int (*release)(struct rproc *, struct rproc_mem_entry *),
>                    const char *name, ...);
>  
> 

Reply via email to