On Monday, September 17, 2012, Tejun Heo wrote:
> powernowk8_target() runs off a per-cpu work item and if the
> cpufreq_policy->cpu is different from the current one, it migrates the
> kworker to the target CPU by manipulating current->cpus_allowed.  The
> function migrates the kworker back to the original CPU but this is
> still broken.  Workqueue concurrency management requires the kworkers
> to stay on the same CPU and powernowk8_target() ends up triggerring
> BUG_ON(rq != this_rq()) in try_to_wake_up_local() if it contends on
> fidvid_mutex and sleeps.
> 
> It is unclear why this bug is being reported now.  Duncan says it
> appeared to be a regression of 3.6-rc1 and couldn't reproduce it on
> 3.5.  Bisection seemed to point to 63d95a91 "workqueue: use @pool
> instead of @gcwq or @cpu where applicable" which is an non-functional
> change.  Given that the reproduce case sometimes took upto days to
> trigger, it's easy to be misled while bisecting.  Maybe something made
> contention on fidvid_mutex more likely?  I don't know.
> 
> This patch fixes the bug by punting to another per-cpu work item on
> the target CPU if it isn't the same as the current one.  The code
> assumes that cpufreq_policy->cpu is kept online by the caller, which
> Rafael tells me is the case.
> 
> Signed-off-by: Tejun Heo <[email protected]>
> Reported-by: Duncan <[email protected]>
> Cc: Rafael J. Wysocki <[email protected]>
> Cc: Andreas Herrmann <[email protected]>
> Cc: [email protected]
> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=47301
> ---
> 
> While it's very late in the merge cycle, the fix is limited in scope
> and fairly safe, so it wouldn't be too crazy to merge but then again
> this can go through the next -rc1 and then -stable.  Linus, Rafael,
> what do you guys think?

Well, I don't see much reason to wait with this, although I'd like some
more people to check it.

Andre, Thomas, can you please have a look at it?

Rafael


>  drivers/cpufreq/powernow-k8.c |   89 
> +++++++++++++++++++++++++-----------------
>  1 file changed, 53 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
> index c0e8164..53db9de 100644
> --- a/drivers/cpufreq/powernow-k8.c
> +++ b/drivers/cpufreq/powernow-k8.c
> @@ -35,7 +35,6 @@
>  #include <linux/slab.h>
>  #include <linux/string.h>
>  #include <linux/cpumask.h>
> -#include <linux/sched.h>     /* for current / set_cpus_allowed() */
>  #include <linux/io.h>
>  #include <linux/delay.h>
>  
> @@ -1139,46 +1138,43 @@ static int transition_frequency_pstate(struct 
> powernow_k8_data *data,
>       return res;
>  }
>  
> -/* Driver entry point to switch to the target frequency */
> -static int powernowk8_target(struct cpufreq_policy *pol,
> -             unsigned targfreq, unsigned relation)
> +struct powernowk8_target_work {
> +     struct work_struct              work;
> +     struct cpufreq_policy           *pol;
> +     unsigned                        targfreq;
> +     unsigned                        relation;
> +     int                             ret;
> +};
> +
> +static void powernowk8_target_on_cpu(struct work_struct *work)
>  {
> -     cpumask_var_t oldmask;
> +     struct powernowk8_target_work *tw =
> +             container_of(work, struct powernowk8_target_work, work);
> +     struct cpufreq_policy *pol = tw->pol;
>       struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
>       u32 checkfid;
>       u32 checkvid;
>       unsigned int newstate;
> -     int ret = -EIO;
>  
> +     tw->ret = -EINVAL;
>       if (!data)
> -             return -EINVAL;
> +             return;
> +
> +     tw->ret = -EIO;
>  
>       checkfid = data->currfid;
>       checkvid = data->currvid;
>  
> -     /* only run on specific CPU from here on. */
> -     /* This is poor form: use a workqueue or smp_call_function_single */
> -     if (!alloc_cpumask_var(&oldmask, GFP_KERNEL))
> -             return -ENOMEM;
> -
> -     cpumask_copy(oldmask, tsk_cpus_allowed(current));
> -     set_cpus_allowed_ptr(current, cpumask_of(pol->cpu));
> -
> -     if (smp_processor_id() != pol->cpu) {
> -             printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
> -             goto err_out;
> -     }
> -
>       if (pending_bit_stuck()) {
>               printk(KERN_ERR PFX "failing targ, change pending bit set\n");
> -             goto err_out;
> +             return;
>       }
>  
>       pr_debug("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
> -             pol->cpu, targfreq, pol->min, pol->max, relation);
> +             pol->cpu, tw->targfreq, pol->min, pol->max, tw->relation);
>  
>       if (query_current_values_with_pending_wait(data))
> -             goto err_out;
> +             return;
>  
>       if (cpu_family != CPU_HW_PSTATE) {
>               pr_debug("targ: curr fid 0x%x, vid 0x%x\n",
> @@ -1195,23 +1191,23 @@ static int powernowk8_target(struct cpufreq_policy 
> *pol,
>       }
>  
>       if (cpufreq_frequency_table_target(pol, data->powernow_table,
> -                             targfreq, relation, &newstate))
> -             goto err_out;
> +                             tw->targfreq, tw->relation, &newstate))
> +             return;
>  
>       mutex_lock(&fidvid_mutex);
>  
>       powernow_k8_acpi_pst_values(data, newstate);
>  
>       if (cpu_family == CPU_HW_PSTATE)
> -             ret = transition_frequency_pstate(data,
> -                     data->powernow_table[newstate].index);
> +             tw->ret = transition_frequency_pstate(data,
> +                             data->powernow_table[newstate].index);
>       else
> -             ret = transition_frequency_fidvid(data, newstate);
> -     if (ret) {
> +             tw->ret = transition_frequency_fidvid(data, newstate);
> +     if (tw->ret) {
>               printk(KERN_ERR PFX "transition frequency failed\n");
> -             ret = 1;
> +             tw->ret = 1;
>               mutex_unlock(&fidvid_mutex);
> -             goto err_out;
> +             return;
>       }
>       mutex_unlock(&fidvid_mutex);
>  
> @@ -1220,12 +1216,33 @@ static int powernowk8_target(struct cpufreq_policy 
> *pol,
>                               data->powernow_table[newstate].index);
>       else
>               pol->cur = find_khz_freq_from_fid(data->currfid);
> -     ret = 0;
>  
> -err_out:
> -     set_cpus_allowed_ptr(current, oldmask);
> -     free_cpumask_var(oldmask);
> -     return ret;
> +     tw->ret = 0;
> +}
> +
> +/* Driver entry point to switch to the target frequency */
> +static int powernowk8_target(struct cpufreq_policy *pol,
> +             unsigned targfreq, unsigned relation)
> +{
> +     struct powernowk8_target_work tw;
> +
> +     /*
> +      * Must run on @pol->cpu.  Bounce to workqueue if necessary.
> +      * cpufreq core is responsible for ensuring the cpu stays online.
> +      */
> +     INIT_WORK_ONSTACK(&tw.work, powernowk8_target_on_cpu);
> +     tw.pol = pol;
> +     tw.targfreq = targfreq;
> +     tw.relation = relation;
> +
> +     if (smp_processor_id() == pol->cpu) {
> +             powernowk8_target_on_cpu(&tw.work);
> +     } else {
> +             schedule_work_on(pol->cpu, &tw.work);
> +             flush_work(&tw.work);
> +     }
> +
> +     return tw.ret;
>  }
>  
>  /* Driver entry point to verify the policy and range of frequencies */
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to