From: Pasi Sjöholm <[email protected]>

It is possible to get kernel in deadlock-state if swap_lock is not locked
with spin_lock_bh by calling si_swapinfo() simultaneously through
timer_function and registered vm shinker callback-function.

BUG: spinlock recursion on CPU#0, main/2447
lock: swap_lock+0x0/0x10, .magic: dead4ead, .owner: main/2447, .owner_cpu: 0
[<c010b938>] (unwind_backtrace+0x0/0x11c) from [<c03e9be0>] 
(do_raw_spin_lock+0x48/0x154)
[<c03e9be0>] (do_raw_spin_lock+0x48/0x154) from [<c0226e10>] 
(si_swapinfo+0x10/0x90)
[<c0226e10>] (si_swapinfo+0x10/0x90) from [<c04d7e18>] 
(timer_function+0x24/0x258)
[<c04d7e18>] (timer_function+0x24/0x258) from [<c0182a10>] 
(run_timer_softirq+0x27c/0x3c0)
[<c0182a10>] (run_timer_softirq+0x27c/0x3c0) from [<c017bd10>] 
(__do_softirq+0x12c/0x268)
[<c017bd10>] (__do_softirq+0x12c/0x268) from [<c017c25c>] (irq_exit+0x48/0xa0)
[<c017c25c>] (irq_exit+0x48/0xa0) from [<c01066a4>] (handle_IRQ+0x80/0xc0)
[<c01066a4>] (handle_IRQ+0x80/0xc0) from [<c0100474>] 
(gic_handle_irq+0x90/0x10c)
[<c0100474>] (gic_handle_irq+0x90/0x10c) from [<c08a9500>] (__irq_svc+0x40/0x70)
Exception stack(0xd3425a58 to 0xd3425aa0)
5a40:                                                       c20f628000000040
5a60: 0000005300000001c20f628000000bc5c0efb6c8c10b8be0000000d400000001
5a80: d3425bb40000000000000000d3425aa0c0228820c022820020000113ffffffff
[<c08a9500>] (__irq_svc+0x40/0x70) from [<c0228200>] (scan_swap_map+0x14/0x518)
[<c0228200>] (scan_swap_map+0x14/0x518) from [<c0228820>] 
(get_swap_page+0x98/0x108)
[<c0228820>] (get_swap_page+0x98/0x108) from [<c0226400>] 
(add_to_swap+0x20/0x74)
[<c0226400>] (add_to_swap+0x20/0x74) from [<c0208090>] 
(shrink_page_list+0x234/0x8a0)
[<c0208090>] (shrink_page_list+0x234/0x8a0) from [<c0208b14>] 
(shrink_inactive_list+0x214/0x4c4)
[<c0208b14>] (shrink_inactive_list+0x214/0x4c4) from [<c020919c>] 
(shrink_mem_cgroup_zone+0x3d8/0x534)

Signed-off-by: Pasi Sjöholm <[email protected]>
---
 mm/swapfile.c | 54 +++++++++++++++++++++++++++---------------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 63f55cc..b00a55e 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -993,7 +993,7 @@ int swap_type_of(dev_t device, sector_t offset, struct 
block_device **bdev_p)
        if (device)
                bdev = bdget(device);
 
-       spin_lock(&swap_lock);
+       spin_lock_bh(&swap_lock);
        for (type = 0; type < nr_swapfiles; type++) {
                struct swap_info_struct *sis = swap_info[type];
 
@@ -1004,7 +1004,7 @@ int swap_type_of(dev_t device, sector_t offset, struct 
block_device **bdev_p)
                        if (bdev_p)
                                *bdev_p = bdgrab(sis->bdev);
 
-                       spin_unlock(&swap_lock);
+                       spin_unlock_bh(&swap_lock);
                        return type;
                }
                if (bdev == sis->bdev) {
@@ -1014,13 +1014,13 @@ int swap_type_of(dev_t device, sector_t offset, struct 
block_device **bdev_p)
                                if (bdev_p)
                                        *bdev_p = bdgrab(sis->bdev);
 
-                               spin_unlock(&swap_lock);
+                               spin_unlock_bh(&swap_lock);
                                bdput(bdev);
                                return type;
                        }
                }
        }
-       spin_unlock(&swap_lock);
+       spin_unlock_bh(&swap_lock);
        if (bdev)
                bdput(bdev);
 
@@ -1052,7 +1052,7 @@ unsigned int count_swap_pages(int type, int free)
 {
        unsigned int n = 0;
 
-       spin_lock(&swap_lock);
+       spin_lock_bh(&swap_lock);
        if ((unsigned int)type < nr_swapfiles) {
                struct swap_info_struct *sis = swap_info[type];
 
@@ -1064,7 +1064,7 @@ unsigned int count_swap_pages(int type, int free)
                }
                spin_unlock(&sis->lock);
        }
-       spin_unlock(&swap_lock);
+       spin_unlock_bh(&swap_lock);
        return n;
 }
 #endif /* CONFIG_HIBERNATION */
@@ -1783,20 +1783,20 @@ static void enable_swap_info(struct swap_info_struct 
*p, int prio,
                                unsigned long *frontswap_map)
 {
        frontswap_init(p->type, frontswap_map);
-       spin_lock(&swap_lock);
+       spin_lock_bh(&swap_lock);
        spin_lock(&p->lock);
         _enable_swap_info(p, prio, swap_map, cluster_info);
        spin_unlock(&p->lock);
-       spin_unlock(&swap_lock);
+       spin_unlock_bh(&swap_lock);
 }
 
 static void reinsert_swap_info(struct swap_info_struct *p)
 {
-       spin_lock(&swap_lock);
+       spin_lock_bh(&swap_lock);
        spin_lock(&p->lock);
        _enable_swap_info(p, p->prio, p->swap_map, p->cluster_info);
        spin_unlock(&p->lock);
-       spin_unlock(&swap_lock);
+       spin_unlock_bh(&swap_lock);
 }
 
 SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
@@ -1827,7 +1827,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
                goto out;
 
        mapping = victim->f_mapping;
-       spin_lock(&swap_lock);
+       spin_lock_bh(&swap_lock);
        plist_for_each_entry(p, &swap_active_head, list) {
                if (p->flags & SWP_WRITEOK) {
                        if (p->swap_file->f_mapping == mapping) {
@@ -1838,14 +1838,14 @@ SYSCALL_DEFINE1(swapoff, const char __user *, 
specialfile)
        }
        if (!found) {
                err = -EINVAL;
-               spin_unlock(&swap_lock);
+               spin_unlock_bh(&swap_lock);
                goto out_dput;
        }
        if (!security_vm_enough_memory_mm(current->mm, p->pages))
                vm_unacct_memory(p->pages);
        else {
                err = -ENOMEM;
-               spin_unlock(&swap_lock);
+               spin_unlock_bh(&swap_lock);
                goto out_dput;
        }
        spin_lock(&swap_avail_lock);
@@ -1867,7 +1867,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        total_swap_pages -= p->pages;
        p->flags &= ~SWP_WRITEOK;
        spin_unlock(&p->lock);
-       spin_unlock(&swap_lock);
+       spin_unlock_bh(&swap_lock);
 
        set_current_oom_origin();
        err = try_to_unuse(p->type, false, 0); /* force unuse all pages */
@@ -1886,7 +1886,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
                free_swap_count_continuations(p);
 
        mutex_lock(&swapon_mutex);
-       spin_lock(&swap_lock);
+       spin_lock_bh(&swap_lock);
        spin_lock(&p->lock);
        drain_mmlist();
 
@@ -1894,9 +1894,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        p->highest_bit = 0;             /* cuts scans short */
        while (p->flags >= SWP_SCANNING) {
                spin_unlock(&p->lock);
-               spin_unlock(&swap_lock);
+               spin_unlock_bh(&swap_lock);
                schedule_timeout_uninterruptible(1);
-               spin_lock(&swap_lock);
+               spin_lock_bh(&swap_lock);
                spin_lock(&p->lock);
        }
 
@@ -1910,7 +1910,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        p->cluster_info = NULL;
        frontswap_map = frontswap_map_get(p);
        spin_unlock(&p->lock);
-       spin_unlock(&swap_lock);
+       spin_unlock_bh(&swap_lock);
        frontswap_invalidate_area(p->type);
        frontswap_map_set(p, NULL);
        mutex_unlock(&swapon_mutex);
@@ -1939,9 +1939,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
         * can reuse this swap_info in alloc_swap_info() safely.  It is ok to
         * not hold p->lock after we cleared its SWP_WRITEOK.
         */
-       spin_lock(&swap_lock);
+       spin_lock_bh(&swap_lock);
        p->flags = 0;
-       spin_unlock(&swap_lock);
+       spin_unlock_bh(&swap_lock);
 
        err = 0;
        atomic_inc(&proc_poll_event);
@@ -2098,13 +2098,13 @@ static struct swap_info_struct *alloc_swap_info(void)
        if (!p)
                return ERR_PTR(-ENOMEM);
 
-       spin_lock(&swap_lock);
+       spin_lock_bh(&swap_lock);
        for (type = 0; type < nr_swapfiles; type++) {
                if (!(swap_info[type]->flags & SWP_USED))
                        break;
        }
        if (type >= MAX_SWAPFILES) {
-               spin_unlock(&swap_lock);
+               spin_unlock_bh(&swap_lock);
                kfree(p);
                return ERR_PTR(-EPERM);
        }
@@ -2130,7 +2130,7 @@ static struct swap_info_struct *alloc_swap_info(void)
        plist_node_init(&p->list, 0);
        plist_node_init(&p->avail_list, 0);
        p->flags = SWP_USED;
-       spin_unlock(&swap_lock);
+       spin_unlock_bh(&swap_lock);
        spin_lock_init(&p->lock);
 
        return p;
@@ -2536,10 +2536,10 @@ bad_swap:
        }
        destroy_swap_extents(p);
        swap_cgroup_swapoff(p->type);
-       spin_lock(&swap_lock);
+       spin_lock_bh(&swap_lock);
        p->swap_file = NULL;
        p->flags = 0;
-       spin_unlock(&swap_lock);
+       spin_unlock_bh(&swap_lock);
        vfree(swap_map);
        vfree(cluster_info);
        if (swap_file) {
@@ -2566,7 +2566,7 @@ void si_swapinfo(struct sysinfo *val)
        unsigned int type;
        unsigned long nr_to_be_unused = 0;
 
-       spin_lock(&swap_lock);
+       spin_lock_bh(&swap_lock);
        for (type = 0; type < nr_swapfiles; type++) {
                struct swap_info_struct *si = swap_info[type];
 
@@ -2575,7 +2575,7 @@ void si_swapinfo(struct sysinfo *val)
        }
        val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused;
        val->totalswap = total_swap_pages + nr_to_be_unused;
-       spin_unlock(&swap_lock);
+       spin_unlock_bh(&swap_lock);
 }
 
 /*
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to