On Fri, Jan 02, 2026 at 08:59:33AM +0100, Petr Tesarik wrote:
> On Tue, 30 Dec 2025 05:16:00 -0500
> "Michael S. Tsirkin" <[email protected]> wrote:
>
> > If a driver is bugy and has 2 overlapping mappings but only
> > sets cache clean flag on the 1st one of them, we warn.
> > But if it only does it for the 2nd one, we don't.
> >
> > Fix by tracking cache clean flag in the entry.
> > Shrink map_err_type to u8 to avoid bloating up the struct.
> >
> > Signed-off-by: Michael S. Tsirkin <[email protected]>
> > ---
> > kernel/dma/debug.c | 25 ++++++++++++++++++++-----
> > 1 file changed, 20 insertions(+), 5 deletions(-)
> >
> > diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
> > index 7e66d863d573..9bd14fd4c51b 100644
> > --- a/kernel/dma/debug.c
> > +++ b/kernel/dma/debug.c
> > @@ -63,6 +63,7 @@ enum map_err_types {
> > * @sg_mapped_ents: 'mapped_ents' from dma_map_sg
> > * @paddr: physical start address of the mapping
> > * @map_err_type: track whether dma_mapping_error() was checked
> > + * @is_cache_clean: driver promises not to write to buffer while mapped
> > * @stack_len: number of backtrace entries in @stack_entries
> > * @stack_entries: stack of backtrace history
> > */
> > @@ -76,7 +77,8 @@ struct dma_debug_entry {
> > int sg_call_ents;
> > int sg_mapped_ents;
> > phys_addr_t paddr;
> > - enum map_err_types map_err_type;
> > + u8 map_err_type;
>
> Where exactly is the bloat? With my configuration, the size of struct
> dma_debug_entry is 128 bytes, with enough padding bytes at the end to
> keep it at 128 even if I keep this member an enum...
Ah, I missed ____cacheline_aligned_in_smp. Fixed.
>
> Anyway, if there is a reason to keep this member small, I prefer to
> pack enum map_err_types instead:
>
> @@ -46,9 +46,9 @@ enum {
> enum map_err_types {
> MAP_ERR_CHECK_NOT_APPLICABLE,
> MAP_ERR_NOT_CHECKED,
> MAP_ERR_CHECKED,
> -};
> +} __packed;
Wow I didn't realize __packed can apply to enums.
> #define DMA_DEBUG_STACKTRACE_ENTRIES 5
>
> /**
>
> This will shrink it to a single byte but it will also keep the type
> information.
>
> > + bool is_cache_clean;
> > #ifdef CONFIG_STACKTRACE
> > unsigned int stack_len;
> > unsigned long stack_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
> > @@ -472,12 +474,15 @@ static int active_cacheline_dec_overlap(phys_addr_t
> > cln)
> > return active_cacheline_set_overlap(cln, --overlap);
> > }
> >
> > -static int active_cacheline_insert(struct dma_debug_entry *entry)
> > +static int active_cacheline_insert(struct dma_debug_entry *entry,
> > + bool *overlap_cache_clean)
> > {
> > phys_addr_t cln = to_cacheline_number(entry);
> > unsigned long flags;
> > int rc;
> >
> > + *overlap_cache_clean = false;
> > +
> > /* If the device is not writing memory then we don't have any
> > * concerns about the cpu consuming stale data. This mitigates
> > * legitimate usages of overlapping mappings.
> > @@ -487,8 +492,14 @@ static int active_cacheline_insert(struct
> > dma_debug_entry *entry)
> >
> > spin_lock_irqsave(&radix_lock, flags);
> > rc = radix_tree_insert(&dma_active_cacheline, cln, entry);
> > - if (rc == -EEXIST)
> > + if (rc == -EEXIST) {
> > + struct dma_debug_entry *existing;
> > +
> > active_cacheline_inc_overlap(cln);
> > + existing = radix_tree_lookup(&dma_active_cacheline, cln);
> > + if (existing)
> > + *overlap_cache_clean = existing->is_cache_clean;
>
> *nitpick*
>
> IIUC radix_tree_insert() returns -EEXIST only if the key is already
> present in the tree. Since radix_lock is not released between the
> insert attempt and this lookup, I don't see how this lookup could
> possibly fail. If it's not expected to fail, I would add a WARN_ON().
>
> Please, do correct me if I'm missing something.
>
> Other than that, LGTM.
>
> Petr T
Sure, thanks!
> > + }
> > spin_unlock_irqrestore(&radix_lock, flags);
> >
> > return rc;
> > @@ -583,20 +594,24 @@ DEFINE_SHOW_ATTRIBUTE(dump);
> > */
> > static void add_dma_entry(struct dma_debug_entry *entry, unsigned long
> > attrs)
> > {
> > + bool overlap_cache_clean;
> > struct hash_bucket *bucket;
> > unsigned long flags;
> > int rc;
> >
> > + entry->is_cache_clean = !!(attrs & DMA_ATTR_CPU_CACHE_CLEAN);
> > +
> > bucket = get_hash_bucket(entry, &flags);
> > hash_bucket_add(bucket, entry);
> > put_hash_bucket(bucket, flags);
> >
> > - rc = active_cacheline_insert(entry);
> > + rc = active_cacheline_insert(entry, &overlap_cache_clean);
> > if (rc == -ENOMEM) {
> > pr_err_once("cacheline tracking ENOMEM, dma-debug disabled\n");
> > global_disable = true;
> > } else if (rc == -EEXIST &&
> > - !(attrs & (DMA_ATTR_SKIP_CPU_SYNC |
> > DMA_ATTR_CPU_CACHE_CLEAN)) &&
> > + !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
> > + !(entry->is_cache_clean && overlap_cache_clean) &&
> > !(IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) &&
> > is_swiotlb_active(entry->dev))) {
> > err_printk(entry->dev, entry,