On Thu, 19 Mar 2026 18:23:24 +0000 "Lorenzo Stoakes (Oracle)" <[email protected]>
wrote:
> This series expands the mmap_prepare functionality, which is intended to
> replace the deprecated f_op->mmap hook which has been the source of bugs
> and security issues for some time.
>
Thanks, I updated mm.git's mm-new branch to this version.
>
> v3:
> * Propagated tags (thanks Suren, Richard!)
> * Updated 12/16 to correctly clear the vm_area_desc data structure in
> set_desc_from_vma() as per Joshua Hahn (thanks! :)
> * Fixed type in 12/16 as per Suren (cheers!)
> * Fixed up 6/16 to use mmap_action_ioremap_full() in simple_ioremap_prepare()
> as
> suggested by Suren.
> * Also fixed up 6/16 to call io_remap_pfn_range_prepare() direct rather than
> mmap_action_prepare() as per Suren.
> * Also fixed up 6/16 to pass vm_len rather than vm_[start, end] to
> __simple_ioremap_prep() as per Suren (thanks for all the above! :)
> * Fixed issue in rmap lock being held - we were referencing a vma->vm_file
> after
> the VMA was unmapped, so UAF. Avoid that. Also do_munmap() relies on rmap
> lock
> NOT being held or may deadlock, so extend functionality to ensure we drop it
> when it is held on error paths.
> * Updated 'area' -> 'vma' variable in 3/16 in VMA test dup.h.
> * Fixed up reference to __compat_vma_mmap() in 12/16 commit message.
> * Updated 1/16 to no longer duplicatively apply io_remap_pfn_range_pfn().
> * Updated 1/16 to delegate I/O remap complete to remap complete logic.
> * Fixed various typos in 12/16.
> * Fixed stale comment typos in 13/16.
> * Fixed commit msg and comment typos in 14/16.
> * Removed accidental sneak peak to future functionality in 15/16 commit
> message
> :).
> * Fixed up field names to be identical in VMA tests + mm_types.h in 6/16,
> 15/16.
Here's how v3 altered mm.git:
drivers/hv/vmbus_drv.c | 6 +-
drivers/target/target_core_user.c | 2
include/linux/mm.h | 3 -
include/linux/uio_driver.h | 2
mm/internal.h | 22 ++++-----
mm/memory.c | 16 +++----
mm/util.c | 62 ++++++++++++++--------------
mm/vma.c | 24 +++-------
tools/testing/vma/include/dup.h | 19 ++++----
tools/testing/vma/include/stubs.h | 3 -
10 files changed, 77 insertions(+), 82 deletions(-)
--- a/drivers/hv/vmbus_drv.c~b
+++ a/drivers/hv/vmbus_drv.c
@@ -1955,8 +1955,8 @@ static int hv_mmap_ring_buffer_wrapper(s
int err;
/*
- * hv_(create|remove)_ring_sysfs implementation ensures that
mmap_ring_buffer
- * is not NULL.
+ * hv_(create|remove)_ring_sysfs implementation ensures that
+ * mmap_prepare_ring_buffer is not NULL.
*/
compat_set_desc_from_vma(&desc, filp, vma);
err = channel->mmap_prepare_ring_buffer(channel, &desc);
@@ -2055,7 +2055,7 @@ static const struct kobj_type vmbus_chan
/**
* hv_create_ring_sysfs() - create "ring" sysfs entry corresponding to ring
buffers for a channel.
* @channel: Pointer to vmbus_channel structure
- * @hv_mmap_ring_buffer: function pointer for initializing the function to be
called on mmap
+ * @hv_mmap_prepare_ring_buffer: function pointer for initializing the
function to be called on mmap
* channel's "ring" sysfs node, which is for the ring
buffer of that channel.
* Function pointer is of below type:
* int (*hv_mmap_prepare_ring_buffer)(struct
vmbus_channel *channel,
--- a/drivers/target/target_core_user.c~b
+++ a/drivers/target/target_core_user.c
@@ -1865,7 +1865,7 @@ static int tcmu_vma_mapped(unsigned long
{
struct tcmu_dev *udev = *vm_private_data;
- pr_debug("vma_open\n");
+ pr_debug("vma_mapped\n");
kref_get(&udev->kref);
return 0;
--- a/include/linux/mm.h~b
+++ a/include/linux/mm.h
@@ -4405,7 +4405,8 @@ static inline void mmap_action_map_kerne
int mmap_action_prepare(struct vm_area_desc *desc);
int mmap_action_complete(struct vm_area_struct *vma,
- struct mmap_action *action);
+ struct mmap_action *action,
+ bool rmap_lock_held);
/* Look up the first VMA which exactly match the interval vm_start ... vm_end
*/
static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
--- a/include/linux/uio_driver.h~b
+++ a/include/linux/uio_driver.h
@@ -97,7 +97,7 @@ struct uio_device {
* @irq_flags: flags for request_irq()
* @priv: optional private data
* @handler: the device's irq handler
- * @mmap_prepare: mmap_pepare operation for this uio device
+ * @mmap_prepare: mmap_prepare operation for this uio device
* @open: open operation for this uio device
* @release: release operation for this uio device
* @irqcontrol: disable/enable irqs when 0/1 is written to
/dev/uioX
--- a/mm/internal.h~b
+++ a/mm/internal.h
@@ -1805,29 +1805,25 @@ int remap_pfn_range_prepare(struct vm_ar
int remap_pfn_range_complete(struct vm_area_struct *vma,
struct mmap_action *action);
int simple_ioremap_prepare(struct vm_area_desc *desc);
-/* No simple_ioremap_complete, is ultimately handled by remap complete. */
static inline int io_remap_pfn_range_prepare(struct vm_area_desc *desc)
{
struct mmap_action *action = &desc->action;
const unsigned long orig_pfn = action->remap.start_pfn;
+ const pgprot_t orig_pgprot = action->remap.pgprot;
const unsigned long size = action->remap.size;
const unsigned long pfn = io_remap_pfn_range_pfn(orig_pfn, size);
+ int err;
action->remap.start_pfn = pfn;
- return remap_pfn_range_prepare(desc);
-}
-
-static inline int io_remap_pfn_range_complete(struct vm_area_struct *vma,
- struct mmap_action *action)
-{
- const unsigned long size = action->remap.size;
- const unsigned long orig_pfn = action->remap.start_pfn;
- const pgprot_t orig_prot = vma->vm_page_prot;
+ action->remap.pgprot = pgprot_decrypted(orig_pgprot);
+ err = remap_pfn_range_prepare(desc);
+ if (err)
+ return err;
- action->remap.pgprot = pgprot_decrypted(orig_prot);
- action->remap.start_pfn = io_remap_pfn_range_pfn(orig_pfn, size);
- return remap_pfn_range_complete(vma, action);
+ /* Remap does the actual work. */
+ action->type = MMAP_REMAP_PFN;
+ return 0;
}
#ifdef CONFIG_MMU_NOTIFIER
--- a/mm/memory.c~b
+++ a/mm/memory.c
@@ -3207,11 +3207,10 @@ int remap_pfn_range_complete(struct vm_a
return do_remap_pfn_range(vma, start, pfn, size, prot);
}
-static int __simple_ioremap_prep(unsigned long vm_start, unsigned long vm_end,
- pgoff_t vm_pgoff, phys_addr_t start_phys,
- unsigned long size, unsigned long *pfnp)
+static int __simple_ioremap_prep(unsigned long vm_len, pgoff_t vm_pgoff,
+ phys_addr_t start_phys, unsigned long size,
+ unsigned long *pfnp)
{
- const unsigned long vm_len = vm_end - vm_start;
unsigned long pfn, pages;
/* Check that the physical memory area passed in looks valid */
@@ -3250,14 +3249,14 @@ int simple_ioremap_prepare(struct vm_are
unsigned long pfn;
int err;
- err = __simple_ioremap_prep(desc->start, desc->end, desc->pgoff,
+ err = __simple_ioremap_prep(vma_desc_size(desc), desc->pgoff,
start, size, &pfn);
if (err)
return err;
/* The I/O remap logic does the heavy lifting. */
- mmap_action_ioremap(desc, desc->start, pfn, vma_desc_size(desc));
- return mmap_action_prepare(desc);
+ mmap_action_ioremap_full(desc, pfn);
+ return io_remap_pfn_range_prepare(desc);
}
/**
@@ -3283,8 +3282,7 @@ int vm_iomap_memory(struct vm_area_struc
unsigned long pfn;
int err;
- err = __simple_ioremap_prep(vm_start, vm_end, vma->vm_pgoff, start,
- len, &pfn);
+ err = __simple_ioremap_prep(vm_len, vma->vm_pgoff, start, len, &pfn);
if (err)
return err;
--- a/mm/util.c~b
+++ a/mm/util.c
@@ -1181,6 +1181,8 @@ void compat_set_desc_from_vma(struct vm_
const struct file *file,
const struct vm_area_struct *vma)
{
+ memset(desc, 0, sizeof(*desc));
+
desc->mm = vma->vm_mm;
desc->file = (struct file *)file;
desc->start = vma->vm_start;
@@ -1224,7 +1226,7 @@ static int __compat_vma_mapped(struct fi
* @vma: The VMA to which @desc should be applied.
*
* The function assumes that you have obtained a VMA descriptor @desc from
- * compt_set_desc_from_vma(), and already executed the mmap_prepare() hook upon
+ * compat_set_desc_from_vma(), and already executed the mmap_prepare() hook
upon
* it.
*
* It then performs any specified mmap actions, and invokes the
vm_ops->mapped()
@@ -1249,7 +1251,8 @@ int __compat_vma_mmap(struct vm_area_des
/* Update the VMA from the descriptor. */
compat_set_vma_from_desc(vma, desc);
/* Complete any specified mmap actions. */
- err = mmap_action_complete(vma, &desc->action);
+ err = mmap_action_complete(vma, &desc->action,
+ /*rmap_lock_held=*/false);
if (err)
return err;
@@ -1397,29 +1400,31 @@ again:
}
static int mmap_action_finish(struct vm_area_struct *vma,
- struct mmap_action *action, int err)
+ struct mmap_action *action, int err,
+ bool rmap_lock_held)
{
+ if (rmap_lock_held)
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+
+ if (!err) {
+ if (action->success_hook)
+ return action->success_hook(vma);
+ return 0;
+ }
+
/*
* If an error occurs, unmap the VMA altogether and return an error. We
* only clear the newly allocated VMA, since this function is only
* invoked if we do NOT merge, so we only clean up the VMA we created.
*/
- if (err) {
- unmap_vma_locked(vma);
- if (action->error_hook) {
- /* We may want to filter the error. */
- err = action->error_hook(err);
-
- /* The caller should not clear the error. */
- VM_WARN_ON_ONCE(!err);
- }
- return err;
+ unmap_vma_locked(vma);
+ if (action->error_hook) {
+ /* We may want to filter the error. */
+ err = action->error_hook(err);
+ /* The caller should not clear the error. */
+ VM_WARN_ON_ONCE(!err);
}
-
- if (action->success_hook)
- return action->success_hook(vma);
-
- return 0;
+ return err;
}
#ifdef CONFIG_MMU
@@ -1454,13 +1459,15 @@ EXPORT_SYMBOL(mmap_action_prepare);
* mmap_action_complete - Execute VMA descriptor action.
* @vma: The VMA to perform the action upon.
* @action: The action to perform.
+ * @rmap_lock_held: Is the file rmap lock held?
*
* Similar to mmap_action_prepare().
*
* Return: 0 on success, or error, at which point the VMA will be unmapped.
*/
int mmap_action_complete(struct vm_area_struct *vma,
- struct mmap_action *action)
+ struct mmap_action *action,
+ bool rmap_lock_held)
{
int err = 0;
@@ -1471,23 +1478,19 @@ int mmap_action_complete(struct vm_area_
case MMAP_REMAP_PFN:
err = remap_pfn_range_complete(vma, action);
break;
- case MMAP_IO_REMAP_PFN:
- err = io_remap_pfn_range_complete(vma, action);
- break;
case MMAP_MAP_KERNEL_PAGES:
err = map_kernel_pages_complete(vma, action);
break;
+ case MMAP_IO_REMAP_PFN:
case MMAP_SIMPLE_IO_REMAP:
- /*
- * The simple I/O remap should have been delegated to an I/O
- * remap.
- */
+ /* Should have been delegated. */
WARN_ON_ONCE(1);
err = -EINVAL;
break;
}
- return mmap_action_finish(vma, action, err);
+ return mmap_action_finish(vma, action, err,
+ rmap_lock_held);
}
EXPORT_SYMBOL(mmap_action_complete);
#else
@@ -1509,7 +1512,8 @@ int mmap_action_prepare(struct vm_area_d
EXPORT_SYMBOL(mmap_action_prepare);
int mmap_action_complete(struct vm_area_struct *vma,
- struct mmap_action *action)
+ struct mmap_action *action,
+ bool rmap_lock_held)
{
int err = 0;
@@ -1525,7 +1529,7 @@ int mmap_action_complete(struct vm_area_
break;
}
- return mmap_action_finish(vma, action, err);
+ return mmap_action_finish(vma, action, err, rmap_lock_held);
}
EXPORT_SYMBOL(mmap_action_complete);
#endif
--- a/mm/vma.c~b
+++ a/mm/vma.c
@@ -2732,7 +2732,8 @@ static bool can_set_ksm_flags_early(stru
return false;
}
-static int call_mapped_hook(struct vm_area_struct *vma)
+static int call_mapped_hook(struct mmap_state *map,
+ struct vm_area_struct *vma)
{
const struct vm_operations_struct *vm_ops = vma->vm_ops;
void *vm_private_data = vma->vm_private_data;
@@ -2743,6 +2744,9 @@ static int call_mapped_hook(struct vm_ar
err = vm_ops->mapped(vma->vm_start, vma->vm_end, vma->vm_pgoff,
vma->vm_file, &vm_private_data);
if (err) {
+ if (map->hold_file_rmap_lock)
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+
unmap_vma_locked(vma);
return err;
}
@@ -2752,17 +2756,6 @@ static int call_mapped_hook(struct vm_ar
return 0;
}
-static void maybe_drop_file_rmap_lock(struct mmap_state *map,
- struct vm_area_struct *vma)
-{
- struct file *file;
-
- if (!map->hold_file_rmap_lock)
- return;
- file = vma->vm_file;
- i_mmap_unlock_write(file->f_mapping);
-}
-
static unsigned long __mmap_region(struct file *file, unsigned long addr,
unsigned long len, vma_flags_t vma_flags,
unsigned long pgoff, struct list_head *uf)
@@ -2814,11 +2807,10 @@ static unsigned long __mmap_region(struc
__mmap_complete(&map, vma);
if (have_mmap_prepare && allocated_new) {
- error = mmap_action_complete(vma, &desc.action);
+ error = mmap_action_complete(vma, &desc.action,
+ map.hold_file_rmap_lock);
if (!error)
- error = call_mapped_hook(vma);
-
- maybe_drop_file_rmap_lock(&map, vma);
+ error = call_mapped_hook(&map, vma);
if (error)
return error;
}
--- a/tools/testing/vma/include/dup.h~b
+++ a/tools/testing/vma/include/dup.h
@@ -470,13 +470,13 @@ struct mmap_action {
pgprot_t pgprot;
} remap;
struct {
- phys_addr_t start;
- unsigned long len;
+ phys_addr_t start_phys_addr;
+ unsigned long size;
} simple_ioremap;
struct {
unsigned long start;
struct page **pages;
- unsigned long num;
+ unsigned long nr_pages;
pgoff_t pgoff;
} map_kernel;
};
@@ -648,7 +648,7 @@ struct vm_operations_struct {
* upon first mapping a VMA.
* Context: User context. May sleep. Caller holds mmap_lock.
*/
- void (*open)(struct vm_area_struct * area);
+ void (*open)(struct vm_area_struct *vma);
/**
* @close: Called when the VMA is being removed from the MM.
* Context: User context. May sleep. Caller holds mmap_lock.
@@ -672,8 +672,8 @@ struct vm_operations_struct {
int (*mapped)(unsigned long start, unsigned long end, pgoff_t pgoff,
const struct file *file, void **vm_private_data);
/* Called any time before splitting to check if it's allowed */
- int (*may_split)(struct vm_area_struct *area, unsigned long addr);
- int (*mremap)(struct vm_area_struct *area);
+ int (*may_split)(struct vm_area_struct *vma, unsigned long addr);
+ int (*mremap)(struct vm_area_struct *vma);
/*
* Called by mprotect() to make driver-specific permission
* checks before mprotect() is finalised. The VMA must not
@@ -685,7 +685,7 @@ struct vm_operations_struct {
vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order);
vm_fault_t (*map_pages)(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
- unsigned long (*pagesize)(struct vm_area_struct * area);
+ unsigned long (*pagesize)(struct vm_area_struct *vma);
/* notification that a previously read-only page is about to become
* writable, if an error is returned it will cause a SIGBUS */
@@ -1288,6 +1288,8 @@ static inline void compat_set_desc_from_
const struct file *file,
const struct vm_area_struct *vma)
{
+ memset(desc, 0, sizeof(*desc));
+
desc->mm = vma->vm_mm;
desc->file = (struct file *)file;
desc->start = vma->vm_start;
@@ -1342,7 +1344,8 @@ static inline int __compat_vma_mmap(stru
/* Update the VMA from the descriptor. */
compat_set_vma_from_desc(vma, desc);
/* Complete any specified mmap actions. */
- err = mmap_action_complete(vma, &desc->action);
+ err = mmap_action_complete(vma, &desc->action,
+ /*rmap_lock_held=*/false);
if (err)
return err;
--- a/tools/testing/vma/include/stubs.h~b
+++ a/tools/testing/vma/include/stubs.h
@@ -87,7 +87,8 @@ static inline int mmap_action_prepare(st
}
static inline int mmap_action_complete(struct vm_area_struct *vma,
- struct mmap_action *action)
+ struct mmap_action *action,
+ bool rmap_lock_held)
{
return 0;
}
_