On Fri, Jan 02, 2026 at 08:59:33AM +0100, Petr Tesarik wrote:
> On Tue, 30 Dec 2025 05:16:00 -0500
> "Michael S. Tsirkin" <[email protected]> wrote:
> 
> > If a driver is bugy and has 2 overlapping mappings but only
> > sets cache clean flag on the 1st one of them, we warn.
> > But if it only does it for the 2nd one, we don't.
> > 
> > Fix by tracking cache clean flag in the entry.
> > Shrink map_err_type to u8 to avoid bloating up the struct.
> > 
> > Signed-off-by: Michael S. Tsirkin <[email protected]>
> > ---
> >  kernel/dma/debug.c | 25 ++++++++++++++++++++-----
> >  1 file changed, 20 insertions(+), 5 deletions(-)
> > 
> > diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
> > index 7e66d863d573..9bd14fd4c51b 100644
> > --- a/kernel/dma/debug.c
> > +++ b/kernel/dma/debug.c
> > @@ -63,6 +63,7 @@ enum map_err_types {
> >   * @sg_mapped_ents: 'mapped_ents' from dma_map_sg
> >   * @paddr: physical start address of the mapping
> >   * @map_err_type: track whether dma_mapping_error() was checked
> > + * @is_cache_clean: driver promises not to write to buffer while mapped
> >   * @stack_len: number of backtrace entries in @stack_entries
> >   * @stack_entries: stack of backtrace history
> >   */
> > @@ -76,7 +77,8 @@ struct dma_debug_entry {
> >     int              sg_call_ents;
> >     int              sg_mapped_ents;
> >     phys_addr_t      paddr;
> > -   enum map_err_types  map_err_type;
> > +   u8               map_err_type;
> 
> Where exactly is the bloat? With my configuration, the size of struct
> dma_debug_entry is 128 bytes, with enough padding bytes at the end to
> keep it at 128 even if I keep this member an enum...

Ah, I missed ____cacheline_aligned_in_smp.  Fixed.

> 
> Anyway, if there is a reason to keep this member small, I prefer to
> pack enum map_err_types instead:
> 
> @@ -46,9 +46,9 @@ enum {
>  enum map_err_types {
>       MAP_ERR_CHECK_NOT_APPLICABLE,
>       MAP_ERR_NOT_CHECKED,
>       MAP_ERR_CHECKED,
> -};
> +} __packed;

Wow I didn't realize __packed can apply to enums.

>  #define DMA_DEBUG_STACKTRACE_ENTRIES 5
>  
>  /**
> 
> This will shrink it to a single byte but it will also keep the type
> information.
> 
> > +   bool             is_cache_clean;
> >  #ifdef CONFIG_STACKTRACE
> >     unsigned int    stack_len;
> >     unsigned long   stack_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
> > @@ -472,12 +474,15 @@ static int active_cacheline_dec_overlap(phys_addr_t 
> > cln)
> >     return active_cacheline_set_overlap(cln, --overlap);
> >  }
> >  
> > -static int active_cacheline_insert(struct dma_debug_entry *entry)
> > +static int active_cacheline_insert(struct dma_debug_entry *entry,
> > +                              bool *overlap_cache_clean)
> >  {
> >     phys_addr_t cln = to_cacheline_number(entry);
> >     unsigned long flags;
> >     int rc;
> >  
> > +   *overlap_cache_clean = false;
> > +
> >     /* If the device is not writing memory then we don't have any
> >      * concerns about the cpu consuming stale data.  This mitigates
> >      * legitimate usages of overlapping mappings.
> > @@ -487,8 +492,14 @@ static int active_cacheline_insert(struct 
> > dma_debug_entry *entry)
> >  
> >     spin_lock_irqsave(&radix_lock, flags);
> >     rc = radix_tree_insert(&dma_active_cacheline, cln, entry);
> > -   if (rc == -EEXIST)
> > +   if (rc == -EEXIST) {
> > +           struct dma_debug_entry *existing;
> > +
> >             active_cacheline_inc_overlap(cln);
> > +           existing = radix_tree_lookup(&dma_active_cacheline, cln);
> > +           if (existing)
> > +                   *overlap_cache_clean = existing->is_cache_clean;
> 
> *nitpick*
> 
> IIUC radix_tree_insert() returns -EEXIST only if the key is already
> present in the tree. Since radix_lock is not released between the
> insert attempt and this lookup, I don't see how this lookup could
> possibly fail. If it's not expected to fail, I would add a WARN_ON().
> 
> Please, do correct me if I'm missing something.
> 
> Other than that, LGTM.
> 
> Petr T

Sure, thanks!

> > +   }
> >     spin_unlock_irqrestore(&radix_lock, flags);
> >  
> >     return rc;
> > @@ -583,20 +594,24 @@ DEFINE_SHOW_ATTRIBUTE(dump);
> >   */
> >  static void add_dma_entry(struct dma_debug_entry *entry, unsigned long 
> > attrs)
> >  {
> > +   bool overlap_cache_clean;
> >     struct hash_bucket *bucket;
> >     unsigned long flags;
> >     int rc;
> >  
> > +   entry->is_cache_clean = !!(attrs & DMA_ATTR_CPU_CACHE_CLEAN);
> > +
> >     bucket = get_hash_bucket(entry, &flags);
> >     hash_bucket_add(bucket, entry);
> >     put_hash_bucket(bucket, flags);
> >  
> > -   rc = active_cacheline_insert(entry);
> > +   rc = active_cacheline_insert(entry, &overlap_cache_clean);
> >     if (rc == -ENOMEM) {
> >             pr_err_once("cacheline tracking ENOMEM, dma-debug disabled\n");
> >             global_disable = true;
> >     } else if (rc == -EEXIST &&
> > -              !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | 
> > DMA_ATTR_CPU_CACHE_CLEAN)) &&
> > +              !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
> > +              !(entry->is_cache_clean && overlap_cache_clean) &&
> >                !(IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) &&
> >                  is_swiotlb_active(entry->dev))) {
> >             err_printk(entry->dev, entry,


Reply via email to