From: Lihao Liang <[email protected]>

Signed-off-by: Lihao Liang <[email protected]>
---
 include/linux/prcu.h |  73 ++++++++++++++++-----
 kernel/rcu/prcu.c    | 178 +++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 225 insertions(+), 26 deletions(-)

diff --git a/include/linux/prcu.h b/include/linux/prcu.h
index bb20fa40..9f740985 100644
--- a/include/linux/prcu.h
+++ b/include/linux/prcu.h
@@ -1,3 +1,11 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion (PRCU version).
+ * PRCU public definitions.
+ *
+ * Authors: Heng Zhang <[email protected]>
+ *          Lihao Liang <[email protected]>
+ */
+
 #ifndef __LINUX_PRCU_H
 #define __LINUX_PRCU_H
 
@@ -8,12 +16,26 @@
 #include <linux/completion.h>
 
 #ifdef CONFIG_PRCU
+
+/*
+ * Simple list structure of callback versions.
+ *
+ * Note: Ideally, we would like to add the version field
+ * to the rcu_head struct.  But if we do so, other users of
+ * rcu_head in the Linux kernel will complain hard and loudly.
+ */
 struct prcu_version_head {
        unsigned long long version;
        struct prcu_version_head *next;
 };
 
-/* Simple unsegmented callback list for PRCU. */
+/*
+ * Simple unsegmented callback list for PRCU.
+ *
+ * Note: Since we can't add a new version field to rcu_head,
+ * we have to make our own callback list for PRCU instead of
+ * using the existing rcu_cblist. Sigh!
+ */
 struct prcu_cblist {
        struct rcu_head *head;
        struct rcu_head **tail;
@@ -27,31 +49,47 @@ struct prcu_cblist {
        .version_head = NULL, .version_tail = &n.version_head, \
 }
 
+/*
+ * PRCU's per-CPU state.
+ */
 struct prcu_local_struct {
-       unsigned int locked;
-       unsigned int online;
-       unsigned long long version;
-       unsigned long long cb_version;
-       struct rcu_head barrier_head;
-       struct prcu_cblist cblist;
+       unsigned int locked;           /* Nesting level of PRCU read-side */
+                                      /*  critcal sections */
+       unsigned int online;           /* Indicates whether a context-switch */
+                                      /*  has occurred on this CPU */
+       unsigned long long version;    /* Local grace-period version */
+       unsigned long long cb_version; /* Local callback version */
+       struct rcu_head barrier_head;  /* PRCU callback list */
+       struct prcu_cblist cblist;     /* PRCU callback version list */
 };
 
+/*
+ * PRCU's global state.
+ */
 struct prcu_struct {
-       atomic64_t global_version;
-       atomic64_t cb_version;
-       atomic_t active_ctr;
-       atomic_t barrier_cpu_count;
-       struct mutex mtx;
-       struct mutex barrier_mtx;
-       wait_queue_head_t wait_q;
-       struct completion barrier_completion;
+       atomic64_t global_version;            /* Global grace-period version */
+       atomic64_t cb_version;                /* Global callback version */
+       atomic_t active_ctr;                  /* Outstanding PRCU tasks */
+                                             /*  being context-switched */
+       atomic_t barrier_cpu_count;           /* # CPUs waiting on 
prcu_barrier() */
+       struct mutex mtx;                     /* Serialize synchronize_prcu() */
+       struct mutex barrier_mtx;             /* Serialize prcu_barrier() */
+       wait_queue_head_t wait_q;             /* Wait for synchronize_prcu() */
+       struct completion barrier_completion; /* Wait for prcu_barrier() */
 };
 
+/*
+ * PRCU APIs.
+ */
 void prcu_read_lock(void);
 void prcu_read_unlock(void);
 void synchronize_prcu(void);
 void call_prcu(struct rcu_head *head, rcu_callback_t func);
 void prcu_barrier(void);
+
+/*
+ * Internal non-public functions.
+ */
 void prcu_init(void);
 void prcu_note_context_switch(void);
 int prcu_pending(void);
@@ -60,11 +98,16 @@ void prcu_check_callbacks(void);
 
 #else /* #ifdef CONFIG_PRCU */
 
+/*
+ * If CONFIG_PRCU is not defined,
+ * map its APIs to RCU's counterparts.
+ */
 #define prcu_read_lock rcu_read_lock
 #define prcu_read_unlock rcu_read_unlock
 #define synchronize_prcu synchronize_rcu
 #define call_prcu call_rcu
 #define prcu_barrier rcu_barrier
+
 #define prcu_init() do {} while (0)
 #define prcu_note_context_switch() do {} while (0)
 #define prcu_pending() 0
diff --git a/kernel/rcu/prcu.c b/kernel/rcu/prcu.c
index 49cb70e6..ef2c7730 100644
--- a/kernel/rcu/prcu.c
+++ b/kernel/rcu/prcu.c
@@ -1,3 +1,17 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion (PRCU version).
+ * This PRCU implementation is based on a fast consensus protocol
+ * published in the following paper:
+ *
+ * Fast Consensus Using Bounded Staleness for Scalable Read-mostly 
Synchronization.
+ * Haibo Chen, Heng Zhang, Ran Liu, Binyu Zang, and Haibing Guan.
+ * IEEE Transactions on Parallel and Distributed Systems (TPDS), 2016.
+ * https://dl.acm.org/citation.cfm?id=3024114.3024143
+ *
+ * Authors: Heng Zhang <[email protected]>
+ *          Lihao Liang <[email protected]>
+ */
+
 #include <linux/smp.h>
 #include <linux/percpu.h>
 #include <linux/prcu.h>
@@ -8,8 +22,16 @@
 
 #include "rcu.h"
 
+/* Data structures. */
+
+/*
+ * Initialize PRCU's per-CPU local structure.
+ */
 DEFINE_PER_CPU_SHARED_ALIGNED(struct prcu_local_struct, prcu_local);
 
+/*
+ * Initialize PRCU's global structure.
+ */
 struct prcu_struct global_prcu = {
        .global_version = ATOMIC64_INIT(0),
        .cb_version = ATOMIC64_INIT(0),
@@ -20,7 +42,9 @@ struct prcu_struct global_prcu = {
 };
 struct prcu_struct *prcu = &global_prcu;
 
-/* Initialize simple callback list. */
+/*
+ * Initialize simple PRCU callback list.
+ */
 static void prcu_cblist_init(struct prcu_cblist *rclp)
 {
        rclp->head = NULL;
@@ -31,8 +55,8 @@ static void prcu_cblist_init(struct prcu_cblist *rclp)
 }
 
 /*
- * Dequeue the oldest rcu_head structure from the specified callback list;
- * store the callback grace period version number into the version pointer.
+ * Dequeue the oldest rcu_head structure from the specified callback list.
+ * Store the callback version number into the version pointer.
  */
 static struct rcu_head *prcu_cblist_dequeue(struct prcu_cblist *rclp)
 {
@@ -59,6 +83,11 @@ static struct rcu_head *prcu_cblist_dequeue(struct 
prcu_cblist *rclp)
        return rhp;
 }
 
+/* PRCU function implementations. */
+
+/*
+ * Update local PRCU state of the current CPU.
+ */
 static inline void prcu_report(struct prcu_local_struct *local)
 {
        unsigned long long global_version;
@@ -70,6 +99,15 @@ static inline void prcu_report(struct prcu_local_struct 
*local)
                cmpxchg(&local->version, local_version, global_version);
 }
 
+/*
+ * Mark the beginning of a PRCU read-side critical section.
+ *
+ * A PRCU quiescent state of a CPU is when its local ->locked and
+ * ->online variables become 0.
+ *
+ * See prcu_read_unlock() and synchronize_prcu() for more information.
+ * Also see rcu_read_lock() comment header.
+ */
 void prcu_read_lock(void)
 {
        struct prcu_local_struct *local;
@@ -77,29 +115,50 @@ void prcu_read_lock(void)
        local = get_cpu_ptr(&prcu_local);
        if (!local->online) {
                WRITE_ONCE(local->online, 1);
+               /*
+                * Memory barrier is needed for PRCU writers
+                * to see the updated local->online value.
+                */
                smp_mb();
        }
-
        local->locked++;
+       /*
+        * Critical section after entry code.
+        * put_cpu_ptr() provides the needed barrier().
+        */
        put_cpu_ptr(&prcu_local);
 }
 EXPORT_SYMBOL(prcu_read_lock);
 
+/*
+ * Mark the end of a PRCU read-side critical section.
+ *
+ * See prcu_read_lock() and synchronize_prcu() for more information.
+ * Also see rcu_read_unlock() comment header.
+ */
 void prcu_read_unlock(void)
 {
        int locked;
        struct prcu_local_struct *local;
 
-       barrier();
+       barrier(); /* Critical section before exit code. */
        local = get_cpu_ptr(&prcu_local);
        locked = local->locked;
        if (locked) {
                local->locked--;
+               /*
+                * If we are executing the last PRCU task,
+                * update the CPU-local PRCU state.
+                */
                if (locked == 1)
                        prcu_report(local);
                put_cpu_ptr(&prcu_local);
        } else {
                put_cpu_ptr(&prcu_local);
+               /*
+                * If we are executing the last outstanding
+                * PRCU task, wake up synchronize_prcu().
+                */
                if (!atomic_dec_return(&prcu->active_ctr))
                        wake_up(&prcu->wait_q);
        }
@@ -111,10 +170,25 @@ static void prcu_handler(void *info)
        struct prcu_local_struct *local;
 
        local = this_cpu_ptr(&prcu_local);
+       /*
+        * We need to do this check locally on the current CPU
+        * because no memory barrier is used for ->locked so
+        * PRCU writers may not see its latest local value.
+        */
        if (!local->locked)
                WRITE_ONCE(local->version, 
atomic64_read(&prcu->global_version));
 }
 
+/*
+ * Wait until a grace period has completed.
+ *
+ * A PRCU grace period can end if each CPU has passed a PRCU quiescent state
+ * -and- the global variable ->active_ctr is 0, that is all pre-existing
+ * PRCU read-side critical sections have completed.
+ *
+ * See prcu_read_lock() and prcu_read_unlock() for more information.
+ * Also see synchronize_rcu() comment header.
+ */
 void synchronize_prcu(void)
 {
        int cpu;
@@ -122,7 +196,13 @@ void synchronize_prcu(void)
        unsigned long long version;
        struct prcu_local_struct *local;
 
+       /*
+        * Get the new global grace-period version before taking mutex,
+        * which allows multiple synchronize_prcu() calls spreading PRCU
+        * readers can return in a timely fashion.
+        */
        version = atomic64_add_return(1, &prcu->global_version);
+       /* Take mutex to serialize concurrent synchronize_prcu() calls. */
        mutex_lock(&prcu->mtx);
 
        local = get_cpu_ptr(&prcu_local);
@@ -130,8 +210,14 @@ void synchronize_prcu(void)
        put_cpu_ptr(&prcu_local);
 
        cpumask_clear(&cpus);
+       /* Send an IPI to force straggling CPUs to update their PRCU state. */
        for_each_possible_cpu(cpu) {
                local = per_cpu_ptr(&prcu_local, cpu);
+               /*
+                * If no PRCU tasks are currently running on this CPU
+                * or a context-switch has occurred, the CPU-local PRCU
+                * state has already been updated.
+                */
                if (!READ_ONCE(local->online))
                        continue;
                if (READ_ONCE(local->version) < version) {
@@ -140,34 +226,46 @@ void synchronize_prcu(void)
                }
        }
 
+       /* Wait for outstanding CPUs to commit. */
        for_each_cpu(cpu, &cpus) {
                local = per_cpu_ptr(&prcu_local, cpu);
                while (READ_ONCE(local->version) < version)
                        cpu_relax();
        }
 
+       /* Wait for outstanding PRCU tasks to finish. */
        if (atomic_read(&prcu->active_ctr))
                wait_event(prcu->wait_q, !atomic_read(&prcu->active_ctr));
-
+       /* Update the global callback version to its grace-period version. */
        atomic64_set(&prcu->cb_version, version);
        mutex_unlock(&prcu->mtx);
 }
 EXPORT_SYMBOL(synchronize_prcu);
 
+/*
+ * Update PRCU state when a context-switch occurs.
+ */
 void prcu_note_context_switch(void)
 {
        struct prcu_local_struct *local;
 
        local = get_cpu_ptr(&prcu_local);
+       /* Update local and global outstanding PRCU task number. */
        if (local->locked) {
                atomic_add(local->locked, &prcu->active_ctr);
                local->locked = 0;
        }
+       /* Indicate a context-switch has occurred on this CPU. */
        local->online = 0;
+       /* Update this CPU's local PRCU state. */
        prcu_report(local);
        put_cpu_ptr(&prcu_local);
 }
 
+/*
+ * Queue a PRCU callback to the current CPU for invocation
+ * after a grace period.
+ */
 void call_prcu(struct rcu_head *head, rcu_callback_t func)
 {
        unsigned long flags;
@@ -177,8 +275,12 @@ void call_prcu(struct rcu_head *head, rcu_callback_t func)
 
        debug_rcu_head_queue(head);
 
-       /* Use GFP_ATOMIC with IRQs disabled */
+       /* Use GFP_ATOMIC with IRQs disabled. */
        vhp = kmalloc(sizeof(struct prcu_version_head), GFP_ATOMIC);
+       /*
+        * Complain about kmalloc() failure.  This could be handled
+        * in a different way, e.g. return -1 to inform the caller.
+        */
        if (!vhp) {
                WARN_ON(1);
                return;
@@ -188,8 +290,13 @@ void call_prcu(struct rcu_head *head, rcu_callback_t func)
        head->next = NULL;
        vhp->next = NULL;
 
+       /* Disable IRQs to prevent races with prcu_process_callbacks(). */
        local_irq_save(flags);
        local = this_cpu_ptr(&prcu_local);
+       /*
+        * Assign the CPU-local callback version to the given callback
+        * and add it to the PRCU callback list of the current CPU.
+        */
        vhp->version = local->version;
        rclp = &local->cblist;
        rclp->len++;
@@ -201,6 +308,13 @@ void call_prcu(struct rcu_head *head, rcu_callback_t func)
 }
 EXPORT_SYMBOL(call_prcu);
 
+/*
+ * Check to see if there is any immediate PRCU-related work
+ * to be done by the current CPU, returning 1 if so.
+ *
+ * Currently, it only checks whether this CPU has callbacks
+ * that are ready to invoke.
+ */
 int prcu_pending(void)
 {
        struct prcu_local_struct *local = get_cpu_ptr(&prcu_local);
@@ -211,18 +325,33 @@ int prcu_pending(void)
        return cb_version < atomic64_read(&prcu->cb_version) && rclp->head;
 }
 
+/*
+ * Perform PRCU core processing for the current CPU using softirq.
+ */
 void invoke_prcu_core(void)
 {
        if (cpu_online(smp_processor_id()))
                raise_softirq(PRCU_SOFTIRQ);
 }
 
+/*
+ * Schedule PRCU core processing.
+ *
+ * This function must be called from hardirq context.
+ * It is normally invoked from the scheduling-clock interrupt.
+ */
 void prcu_check_callbacks(void)
 {
        if (prcu_pending())
                invoke_prcu_core();
 }
 
+/*
+ * Process PRCU callbacks whose grace period has completed.
+ * Do this using softirq for each CPU.
+ *
+ * Also see the prcu_barrier() comment header.
+ */
 static __latent_entropy void prcu_process_callbacks(struct softirq_action 
*unused)
 {
        unsigned long flags;
@@ -237,18 +366,24 @@ static __latent_entropy void 
prcu_process_callbacks(struct softirq_action *unuse
 
        cb_version = atomic64_read(&prcu->cb_version);
 
-       /* Disable interrupts to prevent races with call_prcu() */
+       /* Disable IRQs to prevent races with call_prcu(). */
        local_irq_save(flags);
        local = this_cpu_ptr(&prcu_local);
        rclp = &local->cblist;
        rhp = rclp->head;
        vhp = rclp->version_head;
+       /*
+        * Process PRCU callbacks with version number smaller
+        * than the global PRCU callback version whose associated
+        * grace periods have completed.
+        */
        for (; rhp && vhp && vhp->version < cb_version;
             rhp = rclp->head, vhp = rclp->version_head) {
                rhp = prcu_cblist_dequeue(rclp);
                debug_rcu_head_unqueue(rhp);
                rhp->func(rhp);
        }
+       /* Record the version number of callbacks to be processed. */
        local->cb_version = cb_version;
        local_irq_restore(flags);
 }
@@ -274,7 +409,18 @@ static void prcu_barrier_func(void *info)
        call_prcu(&local->barrier_head, prcu_barrier_callback);
 }
 
-/* Waiting for all PRCU callbacks to complete. */
+/*
+ * Waiting for all PRCU callbacks to complete.
+ *
+ * NOTE: The current PRCU implementation relies on synchronize_prcu()
+ * to update its global grace-period and callback version numbers.
+ * If there is no synchronize_prcu() running and call_prcu() is called,
+ * rcu_process_callbacks() wont't make progress and prcu_barrier() will
+ * -not- return.
+ *
+ * This needs to be fixed, e.g. using a grace-period expediting mechanism
+ * as found in the Linux-kernel RCU implementation.
+ */
 void prcu_barrier(void)
 {
        int cpu;
@@ -292,9 +438,13 @@ void prcu_barrier(void)
 
        /*
         * Register a new callback on each CPU using IPI to prevent races
-        * with call_prcu(). When that callback is invoked, we will know
+        * with call_prcu().  When that callback is invoked, we will know
         * that all of the corresponding CPU's preceding callbacks have
-        * been invoked.
+        * been invoked. Note that we must use the wait version of
+        * smp_call_function_single().  Otherwise prcu_barrier_func()
+        * might not finish incrementing prcu->barrier_cpu_count and
+        * registering prcu_barrier_callback() on -each- CPU before
+        * we exit the loop and wait for completion. Hence a bug!
         */
        for_each_possible_cpu(cpu)
                smp_call_function_single(cpu, prcu_barrier_func, NULL, 1);
@@ -315,6 +465,9 @@ void prcu_barrier(void)
 }
 EXPORT_SYMBOL(prcu_barrier);
 
+/*
+ * Helper function for prcu_init() to initialize PRCU's CPU-local structure.
+ */
 void prcu_init_local_struct(int cpu)
 {
        struct prcu_local_struct *local;
@@ -327,6 +480,9 @@ void prcu_init_local_struct(int cpu)
        prcu_cblist_init(&local->cblist);
 }
 
+/*
+ * Initialize PRCU at boot time.
+ */
 void __init prcu_init(void)
 {
        int cpu;
-- 
2.14.1.729.g59c0ea183

Reply via email to