padata_flush_queues() is broken for an async ->parallel() function
because flush_work() can't wait on it:

  # modprobe tcrypt 
alg="pcrypt(cryptd(rfc4106(gcm_base(ctr(aes-generic),ghash-generic))))" type=3
  # modprobe tcrypt mode=215 sec=1 &
  # sleep 5; echo 7 > /sys/kernel/pcrypt/pencrypt/parallel_cpumask

  kernel BUG at kernel/padata.c:473!
  invalid opcode: 0000 [#1] SMP PTI
  CPU: 0 PID: 282 Comm: sh Not tainted 5.3.0-rc5-padata-base+ #3
  RIP: 0010:padata_flush_queues+0xa7/0xb0
  Call Trace:
   padata_replace+0xa1/0x110
   padata_set_cpumask+0xae/0x130
   store_cpumask+0x75/0xa0
   padata_sysfs_store+0x20/0x30
   ...

Wait instead for the parallel_data (pd) object's refcount to drop to
zero.

Simplify by taking an initial reference on a pd when allocating an
instance.  That ref is dropped during flushing, which allows calling
wait_for_completion() unconditionally.

If the initial ref weren't used, the only other alternative I could
think of is that complete() would be conditional on !PADATA_INIT or
PADATA_REPLACE (in addition to zero pd->refcnt), and
wait_for_completion() on nonzero pd->refcnt.  But then complete() could
be called without a matching wait:

completer                     waiter
---------                     ------
DEC pd->refcnt    // 0
                              pinst->flags |= PADATA_REPLACE
LOAD pinst->flags // REPLACE
                              LOAD pd->refcnt // 0
                              /* return without wait_for_completion() */
complete()

No more flushing per-CPU work items, so no more CPU hotplug lock in
__padata_stop.

Fixes: 2b73b07ab8a4 ("padata: Flush the padata queues actively")
Reported-by: Herbert Xu <herb...@gondor.apana.org.au>
Suggested-by: Steffen Klassert <steffen.klass...@secunet.com>
Signed-off-by: Daniel Jordan <daniel.m.jor...@oracle.com>
Cc: Sebastian Andrzej Siewior <bige...@linutronix.de>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: linux-crypto@vger.kernel.org
Cc: linux-ker...@vger.kernel.org
---
 include/linux/padata.h |  3 +++
 kernel/padata.c        | 32 ++++++++++++--------------------
 2 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/include/linux/padata.h b/include/linux/padata.h
index 8da673861d99..1c73f9cc7b72 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -14,6 +14,7 @@
 #include <linux/list.h>
 #include <linux/notifier.h>
 #include <linux/kobject.h>
+#include <linux/completion.h>
 
 #define PADATA_CPU_SERIAL   0x01
 #define PADATA_CPU_PARALLEL 0x02
@@ -104,6 +105,7 @@ struct padata_cpumask {
  * @squeue: percpu padata queues used for serialuzation.
  * @reorder_objects: Number of objects waiting in the reorder queues.
  * @refcnt: Number of objects holding a reference on this parallel_data.
+ * @flushing_done: Wait for all objects to be sent out.
  * @max_seq_nr:  Maximal used sequence number.
  * @cpu: Next CPU to be processed.
  * @cpumask: The cpumasks in use for parallel and serial workers.
@@ -116,6 +118,7 @@ struct parallel_data {
        struct padata_serial_queue      __percpu *squeue;
        atomic_t                        reorder_objects;
        atomic_t                        refcnt;
+       struct completion               flushing_done;
        atomic_t                        seq_nr;
        int                             cpu;
        struct padata_cpumask           cpumask;
diff --git a/kernel/padata.c b/kernel/padata.c
index b60cc3dcee58..958166e23123 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -301,7 +301,8 @@ static void padata_serial_worker(struct work_struct 
*serial_work)
                list_del_init(&padata->list);
 
                padata->serial(padata);
-               atomic_dec(&pd->refcnt);
+               if (atomic_dec_return(&pd->refcnt) == 0)
+                       complete(&pd->flushing_done);
        }
        local_bh_enable();
 }
@@ -423,7 +424,9 @@ static struct parallel_data *padata_alloc_pd(struct 
padata_instance *pinst,
        padata_init_squeues(pd);
        atomic_set(&pd->seq_nr, -1);
        atomic_set(&pd->reorder_objects, 0);
-       atomic_set(&pd->refcnt, 0);
+       /* Initial ref dropped in padata_flush_queues. */
+       atomic_set(&pd->refcnt, 1);
+       init_completion(&pd->flushing_done);
        pd->pinst = pinst;
        spin_lock_init(&pd->lock);
        pd->cpu = cpumask_first(pd->cpumask.pcpu);
@@ -453,24 +456,15 @@ static void padata_free_pd(struct parallel_data *pd)
 /* Flush all objects out of the padata queues. */
 static void padata_flush_queues(struct parallel_data *pd)
 {
-       int cpu;
-       struct padata_parallel_queue *pqueue;
-       struct padata_serial_queue *squeue;
-
-       for_each_cpu(cpu, pd->cpumask.pcpu) {
-               pqueue = per_cpu_ptr(pd->pqueue, cpu);
-               flush_work(&pqueue->work);
-       }
-
-       if (atomic_read(&pd->reorder_objects))
-               padata_reorder(pd);
+       if (!(pd->pinst->flags & PADATA_INIT))
+               return;
 
-       for_each_cpu(cpu, pd->cpumask.cbcpu) {
-               squeue = per_cpu_ptr(pd->squeue, cpu);
-               flush_work(&squeue->work);
-       }
+       if (atomic_dec_return(&pd->refcnt) == 0)
+               complete(&pd->flushing_done);
 
-       BUG_ON(atomic_read(&pd->refcnt) != 0);
+       wait_for_completion(&pd->flushing_done);
+       reinit_completion(&pd->flushing_done);
+       atomic_set(&pd->refcnt, 1);
 }
 
 static void __padata_start(struct padata_instance *pinst)
@@ -487,9 +481,7 @@ static void __padata_stop(struct padata_instance *pinst)
 
        synchronize_rcu();
 
-       get_online_cpus();
        padata_flush_queues(pinst->pd);
-       put_online_cpus();
 }
 
 /* Replace the internal control structure with a new one. */
-- 
2.23.0

Reply via email to